📄 cobweb.java
字号:
// tempChildren.removeElementAt(tempChildren.size()-1); } else { // restore the status quo m_children = saveStatusQuo; } } if (finalBestHost != this) { // can commit the instance to the set of instances at this node m_clusterInstances.add(newInstance); } else { m_numberSplits++; } if (finalBestHost == merged) { m_numberMerges++; m_children.removeElementAt(m_children.indexOf(a)); m_children.removeElementAt(m_children.indexOf(b)); m_children.addElement(merged); } if (finalBestHost == newLeaf) { finalBestHost = new CNode(m_numAttributes); m_children.addElement(finalBestHost); } if (bestHostCU < m_cutoff) { if (finalBestHost == this) { // splitting was the best, but since we are cutting all children // recursion is aborted and we still need to add the instance // to the set of instances at this node m_clusterInstances.add(newInstance); } m_children = null; finalBestHost = null; } if (finalBestHost == this) { // splitting is still the best, so downdate the stats as // we'll be recursively calling on this node updateStats(newInstance, true); } return finalBestHost; } /** * Adds the supplied node as a child of this node. All of the child's * instances are added to this nodes instances * * @param child the child to add */ protected void addChildNode(CNode child) { for (int i = 0; i < child.m_clusterInstances.numInstances(); i++) { Instance temp = child.m_clusterInstances.instance(i); m_clusterInstances.add(temp); updateStats(temp, false); } if (m_children == null) { m_children = new FastVector(); } m_children.addElement(child); } /** * Computes the utility of all children with respect to this node * * @return the category utility of the children with respect to this node. * @throws Exception if there are no children */ protected double categoryUtility() throws Exception { if (m_children == null) { throw new Exception("categoryUtility: No children!"); } double totalCU = 0; for (int i = 0; i < m_children.size(); i++) { CNode child = (CNode) m_children.elementAt(i); totalCU += categoryUtilityChild(child); } totalCU /= (double)m_children.size(); return totalCU; } /** * Computes the utility of a single child with respect to this node * * @param child the child for which to compute the utility * @return the utility of the child with respect to this node * @throws Exception if something goes wrong */ protected double categoryUtilityChild(CNode child) throws Exception { double sum = 0; for (int i = 0; i < m_numAttributes; i++) { if (m_clusterInstances.attribute(i).isNominal()) { for (int j = 0; j < m_clusterInstances.attribute(i).numValues(); j++) { double x = child.getProbability(i, j); double y = getProbability(i, j); sum += (x * x) - (y * y); } } else { // numeric attribute sum += ((m_normal / child.getStandardDev(i)) - (m_normal / getStandardDev(i))); } } return (child.m_totalInstances / m_totalInstances) * sum; } /** * Returns the probability of a value of a nominal attribute in this node * * @param attIndex the index of the attribute * @param valueIndex the index of the value of the attribute * @return the probability * @throws Exception if the requested attribute is not nominal */ protected double getProbability(int attIndex, int valueIndex) throws Exception { if (!m_clusterInstances.attribute(attIndex).isNominal()) { throw new Exception("getProbability: attribute is not nominal"); } if (m_attStats[attIndex].totalCount <= 0) { return 0; } return (double) m_attStats[attIndex].nominalCounts[valueIndex] / (double) m_attStats[attIndex].totalCount; } /** * Returns the standard deviation of a numeric attribute * * @param attIndex the index of the attribute * @return the standard deviation * @throws Exception if an error occurs */ protected double getStandardDev(int attIndex) throws Exception { if (!m_clusterInstances.attribute(attIndex).isNumeric()) { throw new Exception("getStandardDev: attribute is not numeric"); } m_attStats[attIndex].numericStats.calculateDerived(); double stdDev = m_attStats[attIndex].numericStats.stdDev; if (Double.isNaN(stdDev) || Double.isInfinite(stdDev)) { return m_acuity; } return Math.max(m_acuity, stdDev); } /** * Update attribute stats using the supplied instance. * * @param updateInstance the instance for updating * @param delete true if the values of the supplied instance are * to be removed from the statistics */ protected void updateStats(Instance updateInstance, boolean delete) { if (m_attStats == null) { m_attStats = new AttributeStats[m_numAttributes]; for (int i = 0; i < m_numAttributes; i++) { m_attStats[i] = new AttributeStats(); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts = new int [m_clusterInstances.attribute(i).numValues()]; } else { m_attStats[i].numericStats = new Stats(); } } } for (int i = 0; i < m_numAttributes; i++) { if (!updateInstance.isMissing(i)) { double value = updateInstance.value(i); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts[(int)value] += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight(); m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight(); } else { if (delete) { m_attStats[i].numericStats.subtract(value, updateInstance.weight()); } else { m_attStats[i].numericStats.add(value, updateInstance.weight()); } } } } m_totalInstances += (delete) ? (-1.0 * updateInstance.weight()) : (updateInstance.weight()); } /** * Recursively assigns numbers to the nodes in the tree. * * @param cl_num an <code>int[]</code> value * @throws Exception if an error occurs */ private void assignClusterNums(int[] cl_num) throws Exception { if (m_children != null && m_children.size() < 2) { throw new Exception("assignClusterNums: tree not built correctly!"); } m_clusterNum = cl_num[0]; cl_num[0]++; if (m_children != null) { for (int i = 0; i < m_children.size(); i++) { CNode child = (CNode) m_children.elementAt(i); child.assignClusterNums(cl_num); } } } /** * Recursively build a string representation of the Cobweb tree * * @param depth depth of this node in the tree * @param text holds the string representation */ protected void dumpTree(int depth, StringBuffer text) { if (depth == 0) determineNumberOfClusters(); if (m_children == null) { text.append("\n"); for (int j = 0; j < depth; j++) { text.append("| "); } text.append("leaf "+m_clusterNum+" [" +m_clusterInstances.numInstances()+"]"); } else { for (int i = 0; i < m_children.size(); i++) { text.append("\n"); for (int j = 0; j < depth; j++) { text.append("| "); } text.append("node "+m_clusterNum+" [" +m_clusterInstances.numInstances() +"]"); ((CNode) m_children.elementAt(i)).dumpTree(depth+1, text); } } } /** * Returns the instances at this node as a string. Appends the cluster * number of the child that each instance belongs to. * * @return a <code>String</code> value * @throws Exception if an error occurs */ protected String dumpData() throws Exception { if (m_children == null) { return m_clusterInstances.toString(); } // construct instances string with cluster numbers attached CNode tempNode = new CNode(m_numAttributes); tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1); for (int i = 0; i < m_children.size(); i++) { tempNode.addChildNode((CNode)m_children.elementAt(i)); } Instances tempInst = tempNode.m_clusterInstances; tempNode = null; Add af = new Add(); af.setAttributeName("Cluster"); String labels = ""; for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode)m_children.elementAt(i); labels += ("C"+temp.m_clusterNum); if (i < m_children.size()-1) { labels+=","; } } af.setNominalLabels(labels); af.setInputFormat(tempInst); tempInst = Filter.useFilter(tempInst, af); tempInst.setRelationName("Cluster "+m_clusterNum); int z = 0; for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode)m_children.elementAt(i); for (int j = 0; j < temp.m_clusterInstances.numInstances(); j++) { tempInst.instance(z).setValue(m_numAttributes, (double)i); z++; } } return tempInst.toString(); } /** * Recursively generate the graph string for the Cobweb tree. * * @param text holds the graph string * @throws Exception if generation fails */ protected void graphTree(StringBuffer text) throws Exception { text.append("N"+m_clusterNum + " [label=\""+((m_children == null) ? "leaf " : "node ") +m_clusterNum+" " +" ("+m_clusterInstances.numInstances() +")\" " +((m_children == null) ? "shape=box style=filled " : "") +(m_saveInstances ? "data =\n"+dumpData() +"\n,\n" : "") + "]\n"); if (m_children != null) { for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode)m_children.elementAt(i); text.append("N"+m_clusterNum +"->" +"N" + temp.m_clusterNum + "\n"); } for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode)m_children.elementAt(i); temp.graphTree(text); } } } } /** * Normal constant. */ protected static final double m_normal = 1.0/(2 * Math.sqrt(Math.PI)); /** * Acuity (minimum standard deviation). */ protected double m_acuity = 1.0; /** * Cutoff (minimum category utility). */ protected double m_cutoff = 0.01 * Cobweb.m_normal; /** * Holds the root of the Cobweb tree. */ protected CNode m_cobwebTree = null; /** * Number of clusters (nodes in the tree). Must never be queried directly, * only via the method numberOfClusters(). Otherwise it's not guaranteed that * it contains the correct value. * * @see #numberOfClusters() * @see #m_numberOfClustersDetermined */ protected int m_numberOfClusters = -1; /** whether the number of clusters was already determined */ protected boolean m_numberOfClustersDetermined = false; /** the number of splits that happened */ protected int m_numberSplits; /** the number of merges that happened */ protected int m_numberMerges; /** * Output instances in graph representation of Cobweb tree (Allows * instances at nodes in the tree to be visualized in the Explorer). */ protected boolean m_saveInstances = false; /** * default constructor */ public Cobweb() { super(); m_SeedDefault = 42; setSeed(m_SeedDefault); } /** * Returns a string describing this clusterer * @return a description of the evaluator suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class implementing the Cobweb and Classit clustering algorithms.\n\n" + "Note: the application of node operators (merging, splitting etc.) in " + "terms of ordering and priority differs (and is somewhat ambiguous) " + "between the original Cobweb and Classit papers. This algorithm always " + "compares the best host, adding a new leaf, merging the two best hosts, " + "and splitting the best host when considering where to place a new " + "instance.\n\n"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -