📄 cobweb.java
字号:
}
return Math.max(m_acuity, stdDev);
}
/**
* Update attribute stats using the supplied instance.
*
* @param updateInstance the instance for updating
* @param delete true if the values of the supplied instance are
* to be removed from the statistics
*/
protected void updateStats(Instance updateInstance,
boolean delete) {
if (m_attStats == null) {
m_attStats = new AttributeStats[m_numAttributes];
for (int i = 0; i < m_numAttributes; i++) {
m_attStats[i] = new AttributeStats();
if (m_clusterInstances.attribute(i).isNominal()) {
m_attStats[i].nominalCounts =
new int [m_clusterInstances.attribute(i).numValues()];
} else {
m_attStats[i].numericStats = new Stats();
}
}
}
for (int i = 0; i < m_numAttributes; i++) {
if (!updateInstance.isMissing(i)) {
double value = updateInstance.value(i);
if (m_clusterInstances.attribute(i).isNominal()) {
m_attStats[i].nominalCounts[(int)value] += (delete) ?
(-1.0 * updateInstance.weight()) :
updateInstance.weight();
m_attStats[i].totalCount += (delete) ?
(-1.0 * updateInstance.weight()) :
updateInstance.weight();
} else {
if (delete) {
m_attStats[i].numericStats.subtract(value,
updateInstance.weight());
} else {
m_attStats[i].numericStats.add(value, updateInstance.weight());
}
}
}
}
m_totalInstances += (delete)
? (-1.0 * updateInstance.weight())
: (updateInstance.weight());
}
/**
* Recursively assigns numbers to the nodes in the tree.
*
* @param cl_num an <code>int[]</code> value
* @exception Exception if an error occurs
*/
private void assignClusterNums(int [] cl_num) throws Exception {
if (m_children != null && m_children.size() < 2) {
throw new Exception("assignClusterNums: tree not built correctly!");
}
m_clusterNum = cl_num[0];
cl_num[0]++;
if (m_children != null) {
for (int i = 0; i < m_children.size(); i++) {
CNode child = (CNode) m_children.elementAt(i);
child.assignClusterNums(cl_num);
}
}
}
/**
* Recursively build a string representation of the Cobweb tree
*
* @param depth depth of this node in the tree
* @param text holds the string representation
*/
protected void dumpTree(int depth, StringBuffer text) {
if (m_children == null) {
text.append("\n");
for (int j = 0; j < depth; j++) {
text.append("| ");
}
text.append("leaf "+m_clusterNum+" ["
+m_clusterInstances.numInstances()+"]");
} else {
for (int i = 0; i < m_children.size(); i++) {
text.append("\n");
for (int j = 0; j < depth; j++) {
text.append("| ");
}
text.append("node "+m_clusterNum+" ["
+m_clusterInstances.numInstances()
+"]");
((CNode) m_children.elementAt(i)).dumpTree(depth+1, text);
}
}
}
/**
* Returns the instances at this node as a string. Appends the cluster
* number of the child that each instance belongs to.
*
* @return a <code>String</code> value
* @exception Exception if an error occurs
*/
protected String dumpData() throws Exception {
if (m_children == null) {
return m_clusterInstances.toString();
}
// construct instances string with cluster numbers attached
CNode tempNode = new CNode(m_numAttributes);
tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1);
for (int i = 0; i < m_children.size(); i++) {
tempNode.addChildNode((CNode)m_children.elementAt(i));
}
Instances tempInst = tempNode.m_clusterInstances;
tempNode = null;
StringBuffer instBuff = new StringBuffer();
Add af = new Add();
af.setAttributeName("Cluster");
String labels = "";
for (int i = 0; i < m_children.size(); i++) {
CNode temp = (CNode)m_children.elementAt(i);
labels += ("C"+temp.m_clusterNum);
if (i < m_children.size()-1) {
labels+=",";
}
}
af.setNominalLabels(labels);
af.setInputFormat(tempInst);
tempInst = Filter.useFilter(tempInst, af);
tempInst.setRelationName("Cluster "+m_clusterNum);
int z = 0;
for (int i = 0; i < m_children.size(); i++) {
CNode temp = (CNode)m_children.elementAt(i);
for (int j = 0; j < temp.m_clusterInstances.numInstances(); j++) {
tempInst.instance(z).setValue(m_numAttributes, (double)i);
z++;
}
}
return tempInst.toString();
}
/**
* Recursively generate the graph string for the Cobweb tree.
*
* @param text holds the graph string
*/
protected void graphTree(StringBuffer text) throws Exception {
text.append("N"+m_clusterNum
+ " [label=\""+((m_children == null)
? "leaf " : "node ")
+m_clusterNum+" "
+" ("+m_clusterInstances.numInstances()
+")\" "
+((m_children == null)
? "shape=box style=filled " : "")
+(m_saveInstances
? "data =\n"+dumpData() +"\n,\n"
: "")
+ "]\n");
if (m_children != null) {
for (int i = 0; i < m_children.size(); i++) {
CNode temp = (CNode)m_children.elementAt(i);
text.append("N"+m_clusterNum
+"->"
+"N" + temp.m_clusterNum
+ "\n");
}
for (int i = 0; i < m_children.size(); i++) {
CNode temp = (CNode)m_children.elementAt(i);
temp.graphTree(text);
}
}
}
}
/**
* Normal constant.
*/
protected static final double m_normal = 1.0/(2 * Math.sqrt(Math.PI));
/**
* Acuity (minimum standard deviation).
*/
protected double m_acuity = 1.0;
/**
* Cutoff (minimum category utility).
*/
protected double m_cutoff = 0.01 * Cobweb.m_normal;
/**
* Holds the root of the Cobweb tree.
*/
protected CNode m_cobwebTree = null;
/**
* Number of clusters (nodes in the tree).
*/
protected int m_numberOfClusters = -1;
protected int m_numberSplits;
protected int m_numberMerges;
/**
* Output instances in graph representation of Cobweb tree (Allows
* instances at nodes in the tree to be visualized in the Explorer).
*/
protected boolean m_saveInstances = false;
/**
* Builds the clusterer.
*
* @param data the training instances.
* @exception Exception if something goes wrong.
*/
public void buildClusterer(Instances data) throws Exception {
m_numberOfClusters = -1;
m_cobwebTree = null;
m_numberSplits = 0;
m_numberMerges = 0;
if (data.checkForStringAttributes()) {
throw new Exception("Can't handle string attributes!");
}
// randomize the instances
data = new Instances(data);
data.randomize(new Random(42));
for (int i = 0; i < data.numInstances(); i++) {
addInstance(data.instance(i));
}
int [] numClusts = new int [1];
numClusts[0] = 0;
m_cobwebTree.assignClusterNums(numClusts);
m_numberOfClusters = numClusts[0];
}
/**
* Classifies a given instance.
*
* @param instance the instance to be assigned to a cluster
* @return the number of the assigned cluster as an interger
* if the class is emerated, otherwise the predicted value
* @exception Exception if instance could not be classified
* successfully
*/
public int clusterInstance(Instance instance) throws Exception {
CNode host = m_cobwebTree;
CNode temp = null;
do {
if (host.m_children == null) {
temp = null;
break;
}
host.updateStats(instance, false);
temp = host.findHost(instance, true);
host.updateStats(instance, true);
if (temp != null) {
host = temp;
}
} while (temp != null);
return host.m_clusterNum;
}
/**
* Returns the number of clusters.
*
* @exception Exception if something goes wrong.
*/
public int numberOfClusters() throws Exception {
return m_numberOfClusters;
}
/**
* Adds an instance to the Cobweb tree.
*
* @param newInstance the instance to be added
* @exception Exception if something goes wrong
*/
public void addInstance(Instance newInstance) throws Exception {
if (m_cobwebTree == null) {
m_cobwebTree = new CNode(newInstance.numAttributes(), newInstance);
} else {
m_cobwebTree.addInstance(newInstance);
}
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
**/
public Enumeration listOptions() {
Vector newVector = new Vector(2);
newVector.addElement(new Option("\tAcuity.\n"
+"\t(default=1.0)", "A", 1,"-A <acuity>"));
newVector.addElement(new Option("\tCutoff.\n"
+"a\t(default=0.002)", "C", 1,"-C <cutoff>"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are:<p>
*
* -A <acuity> <br>
* Acuity. <p>
*
* -C <cutoff> <br>
* Cutoff. <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions(String[] options) throws Exception {
String optionString;
optionString = Utils.getOption('A', options);
if (optionString.length() != 0) {
Double temp = new Double(optionString);
setAcuity(temp.doubleValue());
}
else {
m_acuity = 1.0;
}
optionString = Utils.getOption('C', options);
if (optionString.length() != 0) {
Double temp = new Double(optionString);
setCutoff(temp.doubleValue());
}
else {
m_cutoff = 0.01 * Cobweb.m_normal;
}
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String acuityTipText() {
return "set the minimum standard deviation for numeric attributes";
}
/**
* set the acuity.
* @param a the acuity value
*/
public void setAcuity(double a) {
m_acuity = a;
}
/**
* get the acuity value
* @return the acuity
*/
public double getAcuity() {
return m_acuity;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String cutoffTipText() {
return "set the category utility threshold by which to prune nodes";
}
/**
* set the cutoff
* @param c the cutof
*/
public void setCutoff(double c) {
m_cutoff = c;
}
/**
* get the cutoff
* @return the cutoff
*/
public double getCutoff() {
return m_cutoff;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String saveInstanceDataTipText() {
return "save instance information for visualization purposes";
}
/**
* Get the value of saveInstances.
*
* @return Value of saveInstances.
*/
public boolean getSaveInstanceData() {
return m_saveInstances;
}
/**
* Set the value of saveInstances.
*
* @param newsaveInstances Value to assign to saveInstances.
*/
public void setSaveInstanceData(boolean newsaveInstances) {
m_saveInstances = newsaveInstances;
}
/**
* Gets the current settings of Cobweb.
*
* @return an array of strings suitable for passing to setOptions()
*/
public String [] getOptions() {
String [] options = new String [4];
int current = 0;
options[current++] = "-A";
options[current++] = "" + m_acuity;
options[current++] = "-C";
options[current++] = "" + m_cutoff;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns a description of the clusterer as a string.
*
* @return a string describing the clusterer.
*/
public String toString() {
StringBuffer text = new StringBuffer();
if (m_cobwebTree == null) {
return "Cobweb hasn't been built yet!";
}
else {
m_cobwebTree.dumpTree(0, text);
return "Number of merges: "
+ m_numberMerges+"\nNumber of splits: "
+ m_numberSplits+"\nNumber of clusters: "
+ m_numberOfClusters+"\n"+text.toString()+"\n\n";
}
}
/**
* Returns the type of graphs this class
* represents
* @return Drawable.TREE
*/
public int graphType() {
return Drawable.TREE;
}
/**
* Generates the graph string of the Cobweb tree
*
* @return a <code>String</code> value
* @exception Exception if an error occurs
*/
public String graph() throws Exception {
StringBuffer text = new StringBuffer();
text.append("digraph CobwebTree {\n");
m_cobwebTree.graphTree(text);
text.append("}\n");
return text.toString();
}
// Main method for testing this class
public static void main(String [] argv)
{
try {
System.out.println(ClusterEvaluation.evaluateClusterer(new Cobweb(),
argv));
}
catch (Exception e)
{
System.out.println(e.getMessage());
e.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -