⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 simplekmeans.java

📁 数据挖掘中聚类的算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
      }       if (secondI == m_ClusterCentroids.classIndex()) {	p2++; continue;        } */      double diff;      if (firstI == secondI) {	diff = difference(firstI, 			  first.valueSparse(p1),			  second.valueSparse(p2));	p1++; p2++;      } else if (firstI > secondI) {	diff = difference(secondI, 			  0, second.valueSparse(p2));	p2++;      } else {	diff = difference(firstI, 			  first.valueSparse(p1), 0);	p1++;      }      distance += diff * diff;    }        //return Math.sqrt(distance / m_ClusterCentroids.numAttributes());    return distance;  }  /**   * Computes the difference between two given attribute   * values.   *    * @param index the attribute index   * @param val1 the first value   * @param val2 the second value   * @return the difference   */  private double difference(int index, double val1, double val2) {    switch (m_ClusterCentroids.attribute(index).type()) {    case Attribute.NOMINAL:            // If attribute is nominal      if (Instance.isMissingValue(val1) || 	  Instance.isMissingValue(val2) ||	  ((int)val1 != (int)val2)) {	return 1;      } else {	return 0;      }    case Attribute.NUMERIC:      // If attribute is numeric      if (Instance.isMissingValue(val1) || 	  Instance.isMissingValue(val2)) {	if (Instance.isMissingValue(val1) && 	    Instance.isMissingValue(val2)) {	  return 1;	} else {	  double diff;	  if (Instance.isMissingValue(val2)) {	    diff = norm(val1, index);	  } else {	    diff = norm(val2, index);	  }	  if (diff < 0.5) {	    diff = 1.0 - diff;	  }	  return diff;	}      } else {	return norm(val1, index) - norm(val2, index);      }    default:      return 0;    }  }  /**   * Normalizes a given value of a numeric attribute.   *   * @param x the value to be normalized   * @param i the attribute's index   * @return the normalized value   */  private double norm(double x, int i) {    if (Double.isNaN(m_Min[i]) || Utils.eq(m_Max[i],m_Min[i])) {      return 0;    } else {      return (x - m_Min[i]) / (m_Max[i] - m_Min[i]);    }  }  /**   * Updates the minimum and maximum values for all the attributes   * based on a new instance.   *   * @param instance the new instance   */  private void updateMinMax(Instance instance) {      for (int j = 0;j < m_ClusterCentroids.numAttributes(); j++) {      if (!instance.isMissing(j)) {	if (Double.isNaN(m_Min[j])) {	  m_Min[j] = instance.value(j);	  m_Max[j] = instance.value(j);	} else {	  if (instance.value(j) < m_Min[j]) {	    m_Min[j] = instance.value(j);	  } else {	    if (instance.value(j) > m_Max[j]) {	      m_Max[j] = instance.value(j);	    }	  }	}      }    }  }    /**   * Returns the number of clusters.   *   * @return the number of clusters generated for a training dataset.   * @throws Exception if number of clusters could not be returned   * successfully   */  public int numberOfClusters() throws Exception {    return m_NumClusters;  }  /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions () {    Vector result = new Vector();    result.addElement(new Option(	"\tnumber of clusters.\n"	+ "\t(default 2).", 	"N", 1, "-N <num>"));    Enumeration en = super.listOptions();    while (en.hasMoreElements())      result.addElement(en.nextElement());     return  result.elements();  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String numClustersTipText() {    return "set number of clusters";  }  /**   * set the number of clusters to generate   *   * @param n the number of clusters to generate   * @throws Exception if number of clusters is negative   */  public void setNumClusters(int n) throws Exception {    if (n <= 0) {      throw new Exception("Number of clusters must be > 0");    }    m_NumClusters = n;  }  /**   * gets the number of clusters to generate   *   * @return the number of clusters to generate   */  public int getNumClusters() {    return m_NumClusters;  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -N &lt;num&gt;   *  number of clusters.   *  (default 2).</pre>   *    * <pre> -S &lt;num&gt;   *  Random number seed.   *  (default 10)</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions (String[] options)    throws Exception {    String optionString = Utils.getOption('N', options);    if (optionString.length() != 0) {      setNumClusters(Integer.parseInt(optionString));    }        super.setOptions(options);  }  /**   * Gets the current settings of SimpleKMeans   *   * @return an array of strings suitable for passing to setOptions()   */  public String[] getOptions () {    int       	i;    Vector    	result;    String[]  	options;    result = new Vector();    result.add("-N");    result.add("" + getNumClusters());    options = super.getOptions();    for (i = 0; i < options.length; i++)      result.add(options[i]);    return (String[]) result.toArray(new String[result.size()]);	    }  /**   * return a string describing this clusterer   *   * @return a description of the clusterer as a string   */  public String toString() {    int maxWidth = 0;    for (int i = 0; i < m_NumClusters; i++) {      for (int j = 0 ;j < m_ClusterCentroids.numAttributes(); j++) {	if (m_ClusterCentroids.attribute(j).isNumeric()) {	  double width = Math.log(Math.abs(m_ClusterCentroids.instance(i).value(j))) /	    Math.log(10.0);	  width += 1.0;	  if ((int)width > maxWidth) {	    maxWidth = (int)width;	  }	}      }    }    StringBuffer temp = new StringBuffer();    String naString = "N/A";    for (int i = 0; i < maxWidth+2; i++) {      naString += " ";    }    temp.append("\nkMeans\n======\n");    temp.append("\nNumber of iterations: " + m_Iterations+"\n");    temp.append("Within cluster sum of squared errors: " + Utils.sum(m_squaredErrors));    temp.append("\n\nCluster centroids:\n");    for (int i = 0; i < m_NumClusters; i++) {      temp.append("\nCluster "+i+"\n\t");      temp.append("Mean/Mode: ");      for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {	if (m_ClusterCentroids.attribute(j).isNominal()) {	  temp.append(" "+m_ClusterCentroids.attribute(j).		      value((int)m_ClusterCentroids.instance(i).value(j)));	} else {	  temp.append(" "+Utils.doubleToString(m_ClusterCentroids.instance(i).value(j),					       maxWidth+5, 4));	}      }      temp.append("\n\tStd Devs:  ");      for (int j = 0; j < m_ClusterStdDevs.numAttributes(); j++) {	if (m_ClusterStdDevs.attribute(j).isNumeric()) {	  temp.append(" "+Utils.doubleToString(m_ClusterStdDevs.instance(i).value(j), 					       maxWidth+5, 4));	} else {	  temp.append(" "+naString);	}      }    }    temp.append("\n\n");    return temp.toString();  }  /**   * Gets the the cluster centroids   *    * @return		the cluster centroids   */  public Instances getClusterCentroids() {    return m_ClusterCentroids;  }  /**   * Gets the standard deviations of the numeric attributes in each cluster   *    * @return		the standard deviations of the numeric attributes    * 			in each cluster   */  public Instances getClusterStandardDevs() {    return m_ClusterStdDevs;  }  /**   * Returns for each cluster the frequency counts for the values of each    * nominal attribute   *    * @return		the counts   */  public int [][][] getClusterNominalCounts() {    return m_ClusterNominalCounts;  }  /**   * Gets the squared error for all clusters   *    * @return		the squared error   */  public double getSquaredError() {    return Utils.sum(m_squaredErrors);  }  /**   * Gets the number of instances in each cluster   *    * @return		The number of instances in each cluster   */  public int [] getClusterSizes() {    return m_ClusterSizes;  }  /**   * Main method for testing this class.   *   * @param argv should contain the following arguments: <p>   * -t training file [-N number of clusters]   */  public static void main (String[] argv) {    runClusterer(new SimpleKMeans(), argv);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -