⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cobweb.java

📁 数据挖掘中聚类的算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
	  //	  tempChildren.removeElementAt(tempChildren.size()-1);	} else {	  // restore the status quo	  m_children = saveStatusQuo;	}      }      if (finalBestHost != this) {	// can commit the instance to the set of instances at this node	m_clusterInstances.add(newInstance);      } else {	m_numberSplits++;      }      if (finalBestHost == merged) {	m_numberMerges++;	m_children.removeElementAt(m_children.indexOf(a));	m_children.removeElementAt(m_children.indexOf(b));		m_children.addElement(merged);      }      if (finalBestHost == newLeaf) {	finalBestHost = new CNode(m_numAttributes);	m_children.addElement(finalBestHost);      }      if (bestHostCU < m_cutoff) {	if (finalBestHost == this) {	  // splitting was the best, but since we are cutting all children	  // recursion is aborted and we still need to add the instance	  // to the set of instances at this node	  m_clusterInstances.add(newInstance);	}	m_children = null;	finalBestHost = null;      }      if (finalBestHost == this) {	// splitting is still the best, so downdate the stats as 	// we'll be recursively calling on this node	updateStats(newInstance, true);      }      return finalBestHost;    }        /**     * Adds the supplied node as a child of this node. All of the child's     * instances are added to this nodes instances     *     * @param child the child to add     */    protected void addChildNode(CNode child) {      for (int i = 0; i < child.m_clusterInstances.numInstances(); i++) {	Instance temp = child.m_clusterInstances.instance(i);	m_clusterInstances.add(temp);	updateStats(temp, false);      }      if (m_children == null) {	m_children = new FastVector();      }      m_children.addElement(child);    }    /**     * Computes the utility of all children with respect to this node     *     * @return the category utility of the children with respect to this node.     * @throws Exception if there are no children     */    protected double categoryUtility() throws Exception {            if (m_children == null) {	throw new Exception("categoryUtility: No children!");      }      double totalCU = 0;           for (int i = 0; i < m_children.size(); i++) {	CNode child = (CNode) m_children.elementAt(i);	totalCU += categoryUtilityChild(child);      }      totalCU /= (double)m_children.size();      return totalCU;    }    /**     * Computes the utility of a single child with respect to this node     *     * @param child the child for which to compute the utility     * @return the utility of the child with respect to this node     * @throws Exception if something goes wrong     */    protected double categoryUtilityChild(CNode child) throws Exception {            double sum = 0;      for (int i = 0; i < m_numAttributes; i++) {	if (m_clusterInstances.attribute(i).isNominal()) {	  for (int j = 0; 	       j < m_clusterInstances.attribute(i).numValues(); j++) {	    double x = child.getProbability(i, j);	    double y = getProbability(i, j);	    sum += (x * x) - (y * y);	  }	} else {	  // numeric attribute	  sum += ((m_normal / child.getStandardDev(i)) - 		  (m_normal / getStandardDev(i)));	  	}      }      return (child.m_totalInstances / m_totalInstances) * sum;    }    /**     * Returns the probability of a value of a nominal attribute in this node     *     * @param attIndex the index of the attribute     * @param valueIndex the index of the value of the attribute     * @return the probability     * @throws Exception if the requested attribute is not nominal     */    protected double getProbability(int attIndex, int valueIndex)       throws Exception {            if (!m_clusterInstances.attribute(attIndex).isNominal()) {	throw new Exception("getProbability: attribute is not nominal");      }      if (m_attStats[attIndex].totalCount <= 0) {	return 0;      }      return (double) m_attStats[attIndex].nominalCounts[valueIndex] / 	(double) m_attStats[attIndex].totalCount;    }    /**     * Returns the standard deviation of a numeric attribute     *     * @param attIndex the index of the attribute     * @return the standard deviation     * @throws Exception if an error occurs     */    protected double getStandardDev(int attIndex) throws Exception {      if (!m_clusterInstances.attribute(attIndex).isNumeric()) {	throw new Exception("getStandardDev: attribute is not numeric");      }      m_attStats[attIndex].numericStats.calculateDerived();      double stdDev = m_attStats[attIndex].numericStats.stdDev;      if (Double.isNaN(stdDev) || Double.isInfinite(stdDev)) {	return m_acuity;      }      return Math.max(m_acuity, stdDev);    }    /**     * Update attribute stats using the supplied instance.      *     * @param updateInstance the instance for updating     * @param delete true if the values of the supplied instance are     * to be removed from the statistics     */    protected void updateStats(Instance updateInstance, 			       boolean delete) {      if (m_attStats == null) {	m_attStats = new AttributeStats[m_numAttributes];	for (int i = 0; i < m_numAttributes; i++) {	  m_attStats[i] = new AttributeStats();	  if (m_clusterInstances.attribute(i).isNominal()) {	    m_attStats[i].nominalCounts = 	      new int [m_clusterInstances.attribute(i).numValues()];	  } else {	    m_attStats[i].numericStats = new Stats();	  }	}      }      for (int i = 0; i < m_numAttributes; i++) {	if (!updateInstance.isMissing(i)) {	  double value = updateInstance.value(i);	  if (m_clusterInstances.attribute(i).isNominal()) {	    m_attStats[i].nominalCounts[(int)value] += (delete) ? 	      (-1.0 * updateInstance.weight()) : 	      updateInstance.weight();	    m_attStats[i].totalCount += (delete) ?	      (-1.0 * updateInstance.weight()) :	      updateInstance.weight();	  } else {	    if (delete) {	      m_attStats[i].numericStats.subtract(value, 						  updateInstance.weight());	    } else {	      m_attStats[i].numericStats.add(value, updateInstance.weight());	    }	  }	}      }      m_totalInstances += (delete) 	? (-1.0 * updateInstance.weight()) 	: (updateInstance.weight());    }    /**     * Recursively assigns numbers to the nodes in the tree.     *     * @param cl_num an <code>int[]</code> value     * @throws Exception if an error occurs     */    private void assignClusterNums(int[] cl_num) throws Exception {      if (m_children != null && m_children.size() < 2) {	throw new Exception("assignClusterNums: tree not built correctly!");      }            m_clusterNum = cl_num[0];      cl_num[0]++;      if (m_children != null) {	for (int i = 0; i < m_children.size(); i++) {	  CNode child = (CNode) m_children.elementAt(i);	  child.assignClusterNums(cl_num);	}      }    }    /**     * Recursively build a string representation of the Cobweb tree     *     * @param depth depth of this node in the tree     * @param text holds the string representation     */    protected void dumpTree(int depth, StringBuffer text) {      if (depth == 0)	determineNumberOfClusters();            if (m_children == null) {	text.append("\n");	for (int j = 0; j < depth; j++) {	  text.append("|   ");	}	text.append("leaf "+m_clusterNum+" ["		    +m_clusterInstances.numInstances()+"]");      } else {	for (int i = 0; i < m_children.size(); i++) {	  text.append("\n");	  for (int j = 0; j < depth; j++) {	    text.append("|   ");	  }	  text.append("node "+m_clusterNum+" ["		      +m_clusterInstances.numInstances()		      +"]");	  ((CNode) m_children.elementAt(i)).dumpTree(depth+1, text);	}      }    }    /**     * Returns the instances at this node as a string. Appends the cluster     * number of the child that each instance belongs to.     *     * @return a <code>String</code> value     * @throws Exception if an error occurs     */    protected String dumpData() throws Exception {      if (m_children == null) {	return m_clusterInstances.toString();      }      // construct instances string with cluster numbers attached      CNode tempNode = new CNode(m_numAttributes);      tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1);      for (int i = 0; i < m_children.size(); i++) {	tempNode.addChildNode((CNode)m_children.elementAt(i));      }      Instances tempInst = tempNode.m_clusterInstances;      tempNode = null;      Add af = new Add();      af.setAttributeName("Cluster");      String labels = "";      for (int i = 0; i < m_children.size(); i++) {	CNode temp = (CNode)m_children.elementAt(i);	labels += ("C"+temp.m_clusterNum);	if (i < m_children.size()-1) {	  labels+=",";	}      }      af.setNominalLabels(labels);      af.setInputFormat(tempInst);      tempInst = Filter.useFilter(tempInst, af);      tempInst.setRelationName("Cluster "+m_clusterNum);            int z = 0;      for (int i = 0; i < m_children.size(); i++) {	CNode temp = (CNode)m_children.elementAt(i);	for (int j = 0; j < temp.m_clusterInstances.numInstances(); j++) {	  tempInst.instance(z).setValue(m_numAttributes, (double)i);	  z++;	}      }      return tempInst.toString();    }    /**     * Recursively generate the graph string for the Cobweb tree.     *     * @param text holds the graph string     * @throws Exception if generation fails     */    protected void graphTree(StringBuffer text) throws Exception {            text.append("N"+m_clusterNum		  + " [label=\""+((m_children == null) 				  ? "leaf " : "node ")		  +m_clusterNum+" "		  +" ("+m_clusterInstances.numInstances()		  +")\" "		  +((m_children == null) 		    ? "shape=box style=filled " : "")		  +(m_saveInstances 		    ? "data =\n"+dumpData() +"\n,\n"		    : "")		  + "]\n");      if (m_children != null) {	for (int i = 0; i < m_children.size(); i++) {	  CNode temp = (CNode)m_children.elementAt(i);	  text.append("N"+m_clusterNum		      +"->"		      +"N" + temp.m_clusterNum		      + "\n");	}	for (int i = 0; i < m_children.size(); i++) {	  CNode temp = (CNode)m_children.elementAt(i);	  temp.graphTree(text);	}      }    }  }  /**   * Normal constant.   */  protected static final double m_normal = 1.0/(2 * Math.sqrt(Math.PI));  /**   * Acuity (minimum standard deviation).   */  protected double m_acuity = 1.0;  /**   * Cutoff (minimum category utility).   */  protected double m_cutoff = 0.01 * Cobweb.m_normal;  /**   * Holds the root of the Cobweb tree.   */  protected CNode m_cobwebTree = null;  /**   * Number of clusters (nodes in the tree). Must never be queried directly,    * only via the method numberOfClusters(). Otherwise it's not guaranteed that    * it contains the correct value.   *    * @see #numberOfClusters()   * @see #m_numberOfClustersDetermined   */  protected int m_numberOfClusters = -1;    /** whether the number of clusters was already determined */  protected boolean m_numberOfClustersDetermined = false;    /** the number of splits that happened */  protected int m_numberSplits;    /** the number of merges that happened */  protected int m_numberMerges;  /**   * Output instances in graph representation of Cobweb tree (Allows   * instances at nodes in the tree to be visualized in the Explorer).   */  protected boolean m_saveInstances = false;  /**   * default constructor   */  public Cobweb() {    super();        m_SeedDefault = 42;    setSeed(m_SeedDefault);  }    /**   * Returns a string describing this clusterer   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "Class implementing the Cobweb and Classit clustering algorithms.\n\n"      + "Note: the application of node operators (merging, splitting etc.) in "      + "terms of ordering and priority differs (and is somewhat ambiguous) "      + "between the original Cobweb and Classit papers. This algorithm always "      + "compares the best host, adding a new leaf, merging the two best hosts, "      + "and splitting the best host when considering where to place a new "      + "instance.\n\n"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -