⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 farthestfirst.java

📁 数据挖掘中聚类的算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	  }	}      }    }  }  /**   * clusters an instance that has been through the filters   *   * @param instance the instance to assign a cluster to   * @return a cluster number   */  protected int clusterProcessedInstance(Instance instance) {    double minDist = Double.MAX_VALUE;    int bestCluster = 0;    for (int i = 0; i < m_NumClusters; i++) {      double dist = distance(instance, m_ClusterCentroids.instance(i));      if (dist < minDist) {	minDist = dist;	bestCluster = i;      }    }    return bestCluster;  }  /**   * Classifies a given instance.   *   * @param instance the instance to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @throws Exception if instance could not be classified   * successfully   */  public int clusterInstance(Instance instance) throws Exception {    m_ReplaceMissingFilter.input(instance);    m_ReplaceMissingFilter.batchFinished();    Instance inst = m_ReplaceMissingFilter.output();    return clusterProcessedInstance(inst);  }  /**   * Calculates the distance between two instances   *   * @param first the first instance   * @param second the second instance   * @return the distance between the two given instances, between 0 and 1   */            protected double distance(Instance first, Instance second) {      double distance = 0;    int firstI, secondI;    for (int p1 = 0, p2 = 0; 	 p1 < first.numValues() || p2 < second.numValues();) {      if (p1 >= first.numValues()) {	firstI = m_instances.numAttributes();      } else {	firstI = first.index(p1);       }      if (p2 >= second.numValues()) {	secondI = m_instances.numAttributes();      } else {	secondI = second.index(p2);      }      if (firstI == m_instances.classIndex()) {	p1++; continue;      }       if (secondI == m_instances.classIndex()) {	p2++; continue;      }       double diff;      if (firstI == secondI) {	diff = difference(firstI, 			  first.valueSparse(p1),			  second.valueSparse(p2));	p1++; p2++;      } else if (firstI > secondI) {	diff = difference(secondI, 			  0, second.valueSparse(p2));	p2++;      } else {	diff = difference(firstI, 			  first.valueSparse(p1), 0);	p1++;      }      distance += diff * diff;    }        return Math.sqrt(distance / m_instances.numAttributes());  }  /**   * Computes the difference between two given attribute   * values.   */  protected double difference(int index, double val1, double val2) {    switch (m_instances.attribute(index).type()) {    case Attribute.NOMINAL:            // If attribute is nominal      if (Instance.isMissingValue(val1) || 	  Instance.isMissingValue(val2) ||	  ((int)val1 != (int)val2)) {	return 1;      } else {	return 0;      }    case Attribute.NUMERIC:      // If attribute is numeric      if (Instance.isMissingValue(val1) || 	  Instance.isMissingValue(val2)) {	if (Instance.isMissingValue(val1) && 	    Instance.isMissingValue(val2)) {	  return 1;	} else {	  double diff;	  if (Instance.isMissingValue(val2)) {	    diff = norm(val1, index);	  } else {	    diff = norm(val2, index);	  }	  if (diff < 0.5) {	    diff = 1.0 - diff;	  }	  return diff;	}      } else {	return norm(val1, index) - norm(val2, index);      }    default:      return 0;    }  }  /**   * Normalizes a given value of a numeric attribute.   *   * @param x the value to be normalized   * @param i the attribute's index   * @return the normalized value   */  protected double norm(double x, int i) {    if (Double.isNaN(m_Min[i]) || Utils.eq(m_Max[i],m_Min[i])) {      return 0;    } else {      return (x - m_Min[i]) / (m_Max[i] - m_Min[i]);    }  }  /**   * Returns the number of clusters.   *   * @return the number of clusters generated for a training dataset.   * @throws Exception if number of clusters could not be returned   * successfully   */  public int numberOfClusters() throws Exception {    return m_NumClusters;  }  /**   * Returns an enumeration describing the available options.   *    * @return an enumeration of all the available options.   */  public Enumeration listOptions () {    Vector result = new Vector();        result.addElement(new Option(	"\tnumber of clusters. (default = 2).", 	"N", 1, "-N <num>"));        Enumeration en = super.listOptions();    while (en.hasMoreElements())      result.addElement(en.nextElement());        return  result.elements();  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String numClustersTipText() {    return "set number of clusters";  }  /**   * set the number of clusters to generate   *   * @param n the number of clusters to generate   * @throws Exception if number of clusters is negative   */  public void setNumClusters(int n) throws Exception {    if (n < 0) {      throw new Exception("Number of clusters must be > 0");    }    m_NumClusters = n;  }  /**   * gets the number of clusters to generate   *   * @return the number of clusters to generate   */  public int getNumClusters() {    return m_NumClusters;  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -N &lt;num&gt;   *  number of clusters. (default = 2).</pre>   *    * <pre> -S &lt;num&gt;   *  Random number seed.   *  (default 1)</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions (String[] options)    throws Exception {    String optionString = Utils.getOption('N', options);    if (optionString.length() != 0) {      setNumClusters(Integer.parseInt(optionString));    }        super.setOptions(options);  }  /**   * Gets the current settings of FarthestFirst   *   * @return an array of strings suitable for passing to setOptions()   */  public String[] getOptions () {    int       	i;    Vector    	result;    String[]  	options;    result = new Vector();    result.add("-N");    result.add("" + getNumClusters());    options = super.getOptions();    for (i = 0; i < options.length; i++)      result.add(options[i]);    return (String[]) result.toArray(new String[result.size()]);	    }  /**   * return a string describing this clusterer   *   * @return a description of the clusterer as a string   */  public String toString() {    StringBuffer temp = new StringBuffer();    temp.append("\n FarthestFirst\n==============\n");    temp.append("\nCluster centroids:\n");    for (int i = 0; i < m_NumClusters; i++) {      temp.append("\nCluster "+i+"\n\t");      for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {	if (m_ClusterCentroids.attribute(j).isNominal()) {	  temp.append(" "+m_ClusterCentroids.attribute(j).		      value((int)m_ClusterCentroids.instance(i).value(j)));	} else {	  temp.append(" "+m_ClusterCentroids.instance(i).value(j));	}      }    }    temp.append("\n\n");    return temp.toString();  }  /**   * Main method for testing this class.   *   * @param argv should contain the following arguments: <p>   * -t training file [-N number of clusters]   */  public static void main (String[] argv) {    runClusterer(new FarthestFirst(), argv);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -