📄 cobweb.java
字号:
} return Math.max(m_acuity, stdDev); } /** * Update attribute stats using the supplied instance. * * @param updateInstance the instance for updating * @param delete true if the values of the supplied instance are * to be removed from the statistics */ protected void updateStats(Instance updateInstance, boolean delete) { if (m_attStats == null) { m_attStats = new AttributeStats[m_numAttributes]; for (int i = 0; i < m_numAttributes; i++) { m_attStats[i] = new AttributeStats(); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts = new int [m_clusterInstances.attribute(i).numValues()]; } else { m_attStats[i].numericStats = new Stats(); } } } for (int i = 0; i < m_numAttributes; i++) { if (!updateInstance.isMissing(i)) { double value = updateInstance.value(i); if (m_clusterInstances.attribute(i).isNominal()) { m_attStats[i].nominalCounts[(int)value] += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight(); m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight(); } else { if (delete) { m_attStats[i].numericStats.subtract(value, updateInstance.weight()); } else { m_attStats[i].numericStats.add(value, updateInstance.weight()); } } } } m_totalInstances += (delete) ? (-1.0 * updateInstance.weight()) : (updateInstance.weight()); } /** * Recursively assigns numbers to the nodes in the tree. * * @param cl_num an <code>int[]</code> value * @exception Exception if an error occurs */ private void assignClusterNums(int [] cl_num) throws Exception { if (m_children != null && m_children.size() < 2) { throw new Exception("assignClusterNums: tree not built correctly!"); } m_clusterNum = cl_num[0]; cl_num[0]++; if (m_children != null) { for (int i = 0; i < m_children.size(); i++) { CNode child = (CNode) m_children.elementAt(i); child.assignClusterNums(cl_num); } } } /** * Recursively build a string representation of the Cobweb tree * * @param depth depth of this node in the tree * @param text holds the string representation */ protected void dumpTree(int depth, StringBuffer text) { if (m_children == null) { text.append("\n"); for (int j = 0; j < depth; j++) { text.append("| "); } text.append("leaf "+m_clusterNum+" [" +m_clusterInstances.numInstances()+"]"); } else { for (int i = 0; i < m_children.size(); i++) { text.append("\n"); for (int j = 0; j < depth; j++) { text.append("| "); } text.append("node "+m_clusterNum+" [" +m_clusterInstances.numInstances() +"]"); ((CNode) m_children.elementAt(i)).dumpTree(depth+1, text); } } } /** * Returns the instances at this node as a string. Appends the cluster * number of the child that each instance belongs to. * * @return a <code>String</code> value * @exception Exception if an error occurs */ protected String dumpData() throws Exception { if (m_children == null) { return m_clusterInstances.toString(); } // construct instances string with cluster numbers attached CNode tempNode = new CNode(m_numAttributes); tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1); for (int i = 0; i < m_children.size(); i++) { tempNode.addChildNode((CNode)m_children.elementAt(i)); } Instances tempInst = tempNode.m_clusterInstances; tempNode = null; StringBuffer instBuff = new StringBuffer(); Add af = new Add(); af.setAttributeName("Cluster"); String labels = ""; for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode)m_children.elementAt(i); labels += ("C"+temp.m_clusterNum); if (i < m_children.size()-1) { labels+=","; } } af.setNominalLabels(labels); af.setInputFormat(tempInst); tempInst = Filter.useFilter(tempInst, af); tempInst.setRelationName("Cluster "+m_clusterNum); int z = 0; for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode)m_children.elementAt(i); for (int j = 0; j < temp.m_clusterInstances.numInstances(); j++) { tempInst.instance(z).setValue(m_numAttributes, (double)i); z++; } } return tempInst.toString(); } /** * Recursively generate the graph string for the Cobweb tree. * * @param text holds the graph string */ protected void graphTree(StringBuffer text) throws Exception { text.append("N"+m_clusterNum + " [label=\""+((m_children == null) ? "leaf " : "node ") +m_clusterNum+" " +" ("+m_clusterInstances.numInstances() +")\" " +((m_children == null) ? "shape=box style=filled " : "") +(m_saveInstances ? "data =\n"+dumpData() +"\n,\n" : "") + "]\n"); if (m_children != null) { for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode)m_children.elementAt(i); text.append("N"+m_clusterNum +"->" +"N" + temp.m_clusterNum + "\n"); } for (int i = 0; i < m_children.size(); i++) { CNode temp = (CNode)m_children.elementAt(i); temp.graphTree(text); } } } } /** * Normal constant. */ protected static final double m_normal = 1.0/(2 * Math.sqrt(Math.PI)); /** * Acuity (minimum standard deviation). */ protected double m_acuity = 1.0; /** * Cutoff (minimum category utility). */ protected double m_cutoff = 0.01 * Cobweb.m_normal; /** * Holds the root of the Cobweb tree. */ protected CNode m_cobwebTree = null; /** * Number of clusters (nodes in the tree). */ protected int m_numberOfClusters = -1; protected int m_numberSplits; protected int m_numberMerges; /** * Output instances in graph representation of Cobweb tree (Allows * instances at nodes in the tree to be visualized in the Explorer). */ protected boolean m_saveInstances = false; /** * Builds the clusterer. * * @param data the training instances. * @exception Exception if something goes wrong. */ public void buildClusterer(Instances data) throws Exception { m_numberOfClusters = -1; m_cobwebTree = null; m_numberSplits = 0; m_numberMerges = 0; if (data.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } // randomize the instances data = new Instances(data); data.randomize(new Random(42)); for (int i = 0; i < data.numInstances(); i++) { addInstance(data.instance(i)); } int [] numClusts = new int [1]; numClusts[0] = 0; m_cobwebTree.assignClusterNums(numClusts); m_numberOfClusters = numClusts[0]; } /** * Classifies a given instance. * * @param instance the instance to be assigned to a cluster * @return the number of the assigned cluster as an interger * if the class is enumerated, otherwise the predicted value * @exception Exception if instance could not be classified * successfully */ public int clusterInstance(Instance instance) throws Exception { CNode host = m_cobwebTree; CNode temp = null; do { if (host.m_children == null) { temp = null; break; } host.updateStats(instance, false); temp = host.findHost(instance, true); host.updateStats(instance, true); if (temp != null) { host = temp; } } while (temp != null); return host.m_clusterNum; } /** * Returns the number of clusters. * * @exception Exception if something goes wrong. */ public int numberOfClusters() throws Exception { return m_numberOfClusters; } /** * Adds an instance to the Cobweb tree. * * @param newInstance the instance to be added * @exception Exception if something goes wrong */ public void addInstance(Instance newInstance) throws Exception { if (m_cobwebTree == null) { m_cobwebTree = new CNode(newInstance.numAttributes(), newInstance); } else { m_cobwebTree.addInstance(newInstance); } } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. **/ public Enumeration listOptions() { Vector newVector = new Vector(2); newVector.addElement(new Option("\tAcuity.\n" +"\t(default=1.0)", "A", 1,"-A <acuity>")); newVector.addElement(new Option("\tCutoff.\n" +"a\t(default=0.002)", "C", 1,"-C <cutoff>")); return newVector.elements(); } /** * Parses a given list of options. * * Valid options are:<p> * * -A <acuity> <br> * Acuity. <p> * * -C <cutoff> <br> * Cutoff. <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported * **/ public void setOptions(String[] options) throws Exception { String optionString; optionString = Utils.getOption('A', options); if (optionString.length() != 0) { Double temp = new Double(optionString); setAcuity(temp.doubleValue()); } else { m_acuity = 1.0; } optionString = Utils.getOption('C', options); if (optionString.length() != 0) { Double temp = new Double(optionString); setCutoff(temp.doubleValue()); } else { m_cutoff = 0.01 * Cobweb.m_normal; } } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String acuityTipText() { return "set the minimum standard deviation for numeric attributes"; } /** * set the acuity. * @param a the acuity value */ public void setAcuity(double a) { m_acuity = a; } /** * get the acuity value * @return the acuity */ public double getAcuity() { return m_acuity; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String cutoffTipText() { return "set the category utility threshold by which to prune nodes"; } /** * set the cutoff * @param c the cutof */ public void setCutoff(double c) { m_cutoff = c; } /** * get the cutoff * @return the cutoff */ public double getCutoff() { return m_cutoff; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String saveInstanceDataTipText() { return "save instance information for visualization purposes"; } /** * Get the value of saveInstances. * * @return Value of saveInstances. */ public boolean getSaveInstanceData() { return m_saveInstances; } /** * Set the value of saveInstances. * * @param newsaveInstances Value to assign to saveInstances. */ public void setSaveInstanceData(boolean newsaveInstances) { m_saveInstances = newsaveInstances; } /** * Gets the current settings of Cobweb. * * @return an array of strings suitable for passing to setOptions() */ public String [] getOptions() { String [] options = new String [4]; int current = 0; options[current++] = "-A"; options[current++] = "" + m_acuity; options[current++] = "-C"; options[current++] = "" + m_cutoff; while (current < options.length) { options[current++] = ""; } return options; } /** * Returns a description of the clusterer as a string. * * @return a string describing the clusterer. */ public String toString() { StringBuffer text = new StringBuffer(); if (m_cobwebTree == null) { return "Cobweb hasn't been built yet!"; } else { m_cobwebTree.dumpTree(0, text); return "Number of merges: " + m_numberMerges+"\nNumber of splits: " + m_numberSplits+"\nNumber of clusters: " + m_numberOfClusters+"\n"+text.toString()+"\n\n"; } } /** * Generates the graph string of the Cobweb tree * * @return a <code>String</code> value * @exception Exception if an error occurs */ public String graph() throws Exception { StringBuffer text = new StringBuffer(); text.append("digraph CobwebTree {\n"); m_cobwebTree.graphTree(text); text.append("}\n"); return text.toString(); } // Main method for testing this class public static void main(String [] argv) { try { System.out.println(ClusterEvaluation.evaluateClusterer(new Cobweb(), argv)); } catch (Exception e) { System.out.println(e.getMessage()); e.printStackTrace(); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -