⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kmeans.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
   * Returns the number of clusters.
   *
   * @return the number of clusters generated for a training dataset.
   * @exception Exception if number of clusters could not be returned
   * successfully
   */
  public int numberOfClusters() throws Exception {
    return m_NumClusters;
  }

  /**
   * Returns an enumeration describing the available options.. <p>
   *
   * Valid options are:<p>
   *
   * -N <number of clusters> <br>
   * Specify the number of clusters to generate. If omitted,
   * EM will use cross validation to select the number of clusters
   * automatically. <p>
   *
   * -S <seed> <br>
   * Specify random number seed. <p>
   *
   * @return an enumeration of all the available options.
   *
   **/
  public Enumeration listOptions () {
    Vector newVector = new Vector(2);

     newVector.addElement(new Option("\tnumber of clusters. (default = 2)."
				    , "N", 1, "-N <num>"));
     newVector.addElement(new Option("\trandom number seed.\n (default 10)"
				     , "S", 1, "-S <num>"));

     return  newVector.elements();
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String numClustersTipText() {
    return "set number of clusters";
  }

  /**
   * set the number of clusters to generate
   *
   * @param n the number of clusters to generate
   */
  public void setNumClusters(int n) {
    m_NumClusters = n;
  }

  /**
   * gets the number of clusters to generate
   *
   * @return the number of clusters to generate
   */
  public int getNumClusters() {
    return m_NumClusters;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String seedTipText() {
    return "random number seed";
  }


  /**
   * Set the random number seed
   *
   * @param s the seed
   */
  public void setSeed (int s) {
    m_Seed = s;
  }


  /**
   * Get the random number seed
   *
   * @return the seed
   */
  public int getSeed () {
    return  m_Seed;
  }

  /**
   * Parses a given list of options.
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   *
   **/
  public void setOptions (String[] options)
    throws Exception {

    String optionString = Utils.getOption('N', options);

    if (optionString.length() != 0) {
      setNumClusters(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('S', options);

    if (optionString.length() != 0) {
      setSeed(Integer.parseInt(optionString));
    }
  }

  /**
   * Gets the current settings of KMeans
   *
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions () {
    String[] options = new String[4];
    int current = 0;

    options[current++] = "-N";
    options[current++] = "" + getNumClusters();
    options[current++] = "-S";
    options[current++] = "" + getSeed();

    while (current < options.length) {
      options[current++] = "";
    }

    return  options;
  }

  /**
   * return a string describing this clusterer
   *
   * @return a description of the clusterer as a string
   */
  public String toString() {
    StringBuffer temp = new StringBuffer();

    temp.append("\nkMeans\n======\n");
    temp.append("\nNumber of iterations: " + m_Iterations+"\n");

    temp.append("\nCluster centroids:\n");
    for (int i = 0; i < m_NumClusters; i++) {
      temp.append("\nCluster "+i+"\n\t");
      for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
	if (m_ClusterCentroids.attribute(j).isNominal()) {
	  temp.append(" "+m_ClusterCentroids.attribute(j).
		      value((int)m_ClusterCentroids.instance(i).value(j)));
	} else {
	  temp.append(" "+m_ClusterCentroids.instance(i).value(j));
	}
      }
    }
    pmmlDocument (0);
    return temp.toString();
  }

  /**
   * Main method for testing this class.
   *
   * @param argv should contain the following arguments: <p>
   * -t training file [-N number of clusters]
   */
  public static void main (String[] argv) {
    try {
      System.out.println(ClusterEvaluation.
			 evaluateClusterer(new KMeans(), argv));
    }
    catch (Exception e) {
      log.error (e.getMessage());
      log.error(e.getStackTrace().toString());
    }
  }
/**
 * Creates the PMML document
 */
     private void pmmlDocument (int depth) {
      try{
        Element versionElement = pmmlIntro();
        Element headerElement = header ();
        Element dataDictionaryElement = dataDictionary () ;
        Element clusteringModelElement = clusteringModel ();
        Element miningSchemaElement = miningSchema ();
        Element clusteringFieldElement;
        Element clusterElement;
        versionElement.addContent(headerElement);
        versionElement.addContent(dataDictionaryElement);
        versionElement.addContent(clusteringModelElement);

        clusteringModelElement.addContent(miningSchemaElement);

        FastVector miningSchemaVector = m_instances.getVector();
        String attributeName;
        for (int i=0; i < miningSchemaVector.size();i++){
          org.agentacademy.modules.dataminer.core.Attribute a = (org.agentacademy.modules.dataminer.core.Attribute) miningSchemaVector.elementAt(i);
          attributeName = a.name();
          clusteringFieldElement = clusteringField (attributeName);
          clusteringModelElement.addContent(clusteringFieldElement);
        }
        String clusterText;
        for (int betta = 0; betta < m_NumClusters; betta++) {
              clusterText ="";
              for (int gamma = 0; gamma < m_ClusterCentroids.numAttributes(); gamma++) {
                if (m_ClusterCentroids.attribute(gamma).isNominal()) {
                clusterText += " " +m_ClusterCentroids.attribute(gamma).
                              value((int)m_ClusterCentroids.instance(betta).value(gamma));
                }
                else {
                  clusterText += " "+m_ClusterCentroids.instance(betta).value(gamma);
             }
           }
        clusterElement = clusterDescriptor (clusterText);
        clusteringModelElement.addContent(clusterElement);
        }
      // Create the XML document
        DocType dtd = new DocType("pmml_2_0.dtd");
        Document pmmlDocument = new Document (versionElement,dtd);
     }
      catch (Exception e){
        log.error("PMML Document Exception: " + e);
        log.error(e.getStackTrace().toString());
      }
   }
     /**
    * Creates the pmml Element
    */
   private Element pmmlIntro () throws Exception {

      Element pmmlIntro = new Element ("PMML");
      return pmmlIntro;
   }

  /**
   * Create the Header Element
   */
   private Element header () throws Exception {

      Element header = new Element ("Header");
      String headerString = " Clustering Model of ";
      headerString  = "The Clustering Model of Data Mined Data";
      header.setAttribute("copyright", "issel.ee.auth.gr");
      header.setAttribute("description", headerString);
      Element applicationNameElement = new Element ("Application");
      applicationNameElement.setAttribute("name", "Agent Academy Data Miner");
      applicationNameElement.setAttribute("version", "0.3");
      header.addContent(applicationNameElement);

   return header;
   }

   /**
    *  Create the DataDictionary for the pre - specified XML file
    */
    private Element dataDictionary () throws Exception {

      Element dataDictionary = new Element ("DataDictionary");
      Element dataFieldElement;
      Element attributeValueElement;
      FastVector headerVector = m_instances.getVector();
      String attributeName;
      String attributeType;
      String attributeValueString;
      int numberOfFields = m_instances.numAttributes();
      String numberOfFieldsString = String.valueOf(numberOfFields);
      dataDictionary.setAttribute("numberOfFields", numberOfFieldsString );
      for (int i=0; i < headerVector.size();i++){
        dataFieldElement = new Element ("DataField");
        org.agentacademy.modules.dataminer.core.Attribute a = null;
        a = (org.agentacademy.modules.dataminer.core.Attribute) headerVector.elementAt(i);
        attributeName = a.name();
        dataFieldElement.setAttribute("name", attributeName);
        if (a.isNominal()){
          attributeType = "categorical";
          dataFieldElement.setAttribute("optype",attributeType);
          //
          Enumeration enumerateValues = a.enumerateValues();
          while (enumerateValues.hasMoreElements()){
              attributeValueString = (String)enumerateValues.nextElement();
              attributeValueElement = new Element ("Value");
              attributeValueElement.setAttribute("value", attributeValueString);
              dataFieldElement.addContent(attributeValueElement);
          }
        }
        else if (a.isNumeric()){
          attributeType = "continuous";
          dataFieldElement.setAttribute("optype",attributeType);
        }
        else if (a.isRegular()){
          attributeType = "ordinal";
          dataFieldElement.setAttribute("optype",attributeType);
        }
        else {
          attributeType = "string";
          dataFieldElement.setAttribute("optype",attributeType);
        }
      dataDictionary.addContent(dataFieldElement);
      }
    return dataDictionary;
    }
    /**
     *  Creates the Clustering Model Element
     */
     private Element clusteringModel () throws Exception {

        Element clusteringModelElement = new Element ("ClusteringModel");
        clusteringModelElement.setAttribute("modelName", m_instances.relationName());
        clusteringModelElement.setAttribute("modelClass", "centerBased");
        clusteringModelElement.setAttribute("numberOfClusters", String.valueOf(m_NumClusters));

        return clusteringModelElement;
     }
   /**
    * Creates the Mining Schema Element
    */

   private Element miningSchema () throws Exception {

       Element miningSchema = new Element ("MiningSchema");
       String attributeName;
       FastVector miningSchemaVector = m_instances.getVector();
       for (int i=0; i < miningSchemaVector.size();i++){
          Element miningFieldElement = new Element ("MiningField");
          org.agentacademy.modules.dataminer.core.Attribute a = (org.agentacademy.modules.dataminer.core.Attribute) miningSchemaVector.elementAt(i);
          attributeName = a.name();
          miningFieldElement.setAttribute("name", attributeName);
          miningSchema.addContent(miningFieldElement);
      }
   return miningSchema;
   }

   /**
    * Create the Clustering Field Element
    */
    private Element clusteringField (String fieldString) {

        Element clusteringFieldElement = new Element ("ClusteringField");

        clusteringFieldElement.setAttribute("field",fieldString);
        clusteringFieldElement.setAttribute("compareFunction","squaredEuclidean");

    return clusteringFieldElement;
    }
/**
 * Create the Cluster Description Element
 */
    private Element clusterDescriptor (String string) throws Exception {

      Element clusterElement = new Element ("Cluster");
      Element arrayElement = new Element ("Array");

      int numberOfFields, counter = 0;
      String clusterText = string;
      counter ++;
      clusterElement.setAttribute("name", String.valueOf(counter));
      arrayElement.setAttribute("n",String.valueOf(m_instances.numAttributes()));
      arrayElement.setText(clusterText);
      clusterElement.addContent(arrayElement);
    return clusterElement;
    }


    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -