📄 cobweb.java
字号:
0623: }
0624: m_totalInstances += (delete) ? (-1.0 * updateInstance
0625: .weight()) : (updateInstance.weight());
0626: }
0627:
0628: /**
0629: * Recursively assigns numbers to the nodes in the tree.
0630: *
0631: * @param cl_num an <code>int[]</code> value
0632: * @throws Exception if an error occurs
0633: */
0634: private void assignClusterNums(int[] cl_num) throws Exception {
0635: if (m_children != null && m_children.size() < 2) {
0636: throw new Exception(
0637: "assignClusterNums: tree not built correctly!");
0638: }
0639:
0640: m_clusterNum = cl_num[0];
0641: cl_num[0]++;
0642: if (m_children != null) {
0643: for (int i = 0; i < m_children.size(); i++) {
0644: CNode child = (CNode) m_children.elementAt(i);
0645: child.assignClusterNums(cl_num);
0646: }
0647: }
0648: }
0649:
0650: /**
0651: * Recursively build a string representation of the Cobweb tree
0652: *
0653: * @param depth depth of this node in the tree
0654: * @param text holds the string representation
0655: */
0656: protected void dumpTree(int depth, StringBuffer text) {
0657:
0658: if (depth == 0)
0659: determineNumberOfClusters();
0660:
0661: if (m_children == null) {
0662: text.append("\n");
0663: for (int j = 0; j < depth; j++) {
0664: text.append("| ");
0665: }
0666: text.append("leaf " + m_clusterNum + " ["
0667: + m_clusterInstances.numInstances() + "]");
0668: } else {
0669: for (int i = 0; i < m_children.size(); i++) {
0670: text.append("\n");
0671: for (int j = 0; j < depth; j++) {
0672: text.append("| ");
0673: }
0674: text.append("node " + m_clusterNum + " ["
0675: + m_clusterInstances.numInstances() + "]");
0676: ((CNode) m_children.elementAt(i)).dumpTree(
0677: depth + 1, text);
0678: }
0679: }
0680: }
0681:
0682: /**
0683: * Returns the instances at this node as a string. Appends the cluster
0684: * number of the child that each instance belongs to.
0685: *
0686: * @return a <code>String</code> value
0687: * @throws Exception if an error occurs
0688: */
0689: protected String dumpData() throws Exception {
0690: if (m_children == null) {
0691: return m_clusterInstances.toString();
0692: }
0693:
0694: // construct instances string with cluster numbers attached
0695: CNode tempNode = new CNode(m_numAttributes);
0696: tempNode.m_clusterInstances = new Instances(
0697: m_clusterInstances, 1);
0698: for (int i = 0; i < m_children.size(); i++) {
0699: tempNode.addChildNode((CNode) m_children.elementAt(i));
0700: }
0701: Instances tempInst = tempNode.m_clusterInstances;
0702: tempNode = null;
0703:
0704: Add af = new Add();
0705: af.setAttributeName("Cluster");
0706: String labels = "";
0707: for (int i = 0; i < m_children.size(); i++) {
0708: CNode temp = (CNode) m_children.elementAt(i);
0709: labels += ("C" + temp.m_clusterNum);
0710: if (i < m_children.size() - 1) {
0711: labels += ",";
0712: }
0713: }
0714: af.setNominalLabels(labels);
0715: af.setInputFormat(tempInst);
0716: tempInst = Filter.useFilter(tempInst, af);
0717: tempInst.setRelationName("Cluster " + m_clusterNum);
0718:
0719: int z = 0;
0720: for (int i = 0; i < m_children.size(); i++) {
0721: CNode temp = (CNode) m_children.elementAt(i);
0722: for (int j = 0; j < temp.m_clusterInstances
0723: .numInstances(); j++) {
0724: tempInst.instance(z).setValue(m_numAttributes,
0725: (double) i);
0726: z++;
0727: }
0728: }
0729: return tempInst.toString();
0730: }
0731:
0732: /**
0733: * Recursively generate the graph string for the Cobweb tree.
0734: *
0735: * @param text holds the graph string
0736: * @throws Exception if generation fails
0737: */
0738: protected void graphTree(StringBuffer text) throws Exception {
0739:
0740: text.append("N"
0741: + m_clusterNum
0742: + " [label=\""
0743: + ((m_children == null) ? "leaf " : "node ")
0744: + m_clusterNum
0745: + " "
0746: + " ("
0747: + m_clusterInstances.numInstances()
0748: + ")\" "
0749: + ((m_children == null) ? "shape=box style=filled "
0750: : "")
0751: + (m_saveInstances ? "data =\n" + dumpData()
0752: + "\n,\n" : "") + "]\n");
0753: if (m_children != null) {
0754: for (int i = 0; i < m_children.size(); i++) {
0755: CNode temp = (CNode) m_children.elementAt(i);
0756: text.append("N" + m_clusterNum + "->" + "N"
0757: + temp.m_clusterNum + "\n");
0758: }
0759:
0760: for (int i = 0; i < m_children.size(); i++) {
0761: CNode temp = (CNode) m_children.elementAt(i);
0762: temp.graphTree(text);
0763: }
0764: }
0765: }
0766: }
0767:
0768: /**
0769: * Normal constant.
0770: */
0771: protected static final double m_normal = 1.0 / (2 * Math
0772: .sqrt(Math.PI));
0773:
0774: /**
0775: * Acuity (minimum standard deviation).
0776: */
0777: protected double m_acuity = 1.0;
0778:
0779: /**
0780: * Cutoff (minimum category utility).
0781: */
0782: protected double m_cutoff = 0.01 * Cobweb.m_normal;
0783:
0784: /**
0785: * Holds the root of the Cobweb tree.
0786: */
0787: protected CNode m_cobwebTree = null;
0788:
0789: /**
0790: * Number of clusters (nodes in the tree). Must never be queried directly,
0791: * only via the method numberOfClusters(). Otherwise it's not guaranteed that
0792: * it contains the correct value.
0793: *
0794: * @see #numberOfClusters()
0795: * @see #m_numberOfClustersDetermined
0796: */
0797: protected int m_numberOfClusters = -1;
0798:
0799: /** whether the number of clusters was already determined */
0800: protected boolean m_numberOfClustersDetermined = false;
0801:
0802: /** the number of splits that happened */
0803: protected int m_numberSplits;
0804:
0805: /** the number of merges that happened */
0806: protected int m_numberMerges;
0807:
0808: /**
0809: * Output instances in graph representation of Cobweb tree (Allows
0810: * instances at nodes in the tree to be visualized in the Explorer).
0811: */
0812: protected boolean m_saveInstances = false;
0813:
0814: /**
0815: * default constructor
0816: */
0817: public Cobweb() {
0818: super ();
0819:
0820: m_SeedDefault = 42;
0821: setSeed(m_SeedDefault);
0822: }
0823:
0824: /**
0825: * Returns a string describing this clusterer
0826: * @return a description of the evaluator suitable for
0827: * displaying in the explorer/experimenter gui
0828: */
0829: public String globalInfo() {
0830: return "Class implementing the Cobweb and Classit clustering algorithms.\n\n"
0831: + "Note: the application of node operators (merging, splitting etc.) in "
0832: + "terms of ordering and priority differs (and is somewhat ambiguous) "
0833: + "between the original Cobweb and Classit papers. This algorithm always "
0834: + "compares the best host, adding a new leaf, merging the two best hosts, "
0835: + "and splitting the best host when considering where to place a new "
0836: + "instance.\n\n"
0837: + "For more information see:\n\n"
0838: + getTechnicalInformation().toString();
0839: }
0840:
0841: /**
0842: * Returns an instance of a TechnicalInformation object, containing
0843: * detailed information about the technical background of this class,
0844: * e.g., paper reference or book this class is based on.
0845: *
0846: * @return the technical information about this class
0847: */
0848: public TechnicalInformation getTechnicalInformation() {
0849: TechnicalInformation result;
0850: TechnicalInformation additional;
0851:
0852: result = new TechnicalInformation(Type.ARTICLE);
0853: result.setValue(Field.AUTHOR, "D. Fisher");
0854: result.setValue(Field.YEAR, "1987");
0855: result
0856: .setValue(Field.TITLE,
0857: "Knowledge acquisition via incremental conceptual clustering");
0858: result.setValue(Field.JOURNAL, "Machine Learning");
0859: result.setValue(Field.VOLUME, "2");
0860: result.setValue(Field.NUMBER, "2");
0861: result.setValue(Field.PAGES, "139-172");
0862:
0863: additional = result.add(Type.ARTICLE);
0864: additional.setValue(Field.AUTHOR,
0865: "J. H. Gennari and P. Langley and D. Fisher");
0866: additional.setValue(Field.YEAR, "1990");
0867: additional.setValue(Field.TITLE,
0868: "Models of incremental concept formation");
0869: additional.setValue(Field.JOURNAL, "Artificial Intelligence");
0870: additional.setValue(Field.VOLUME, "40");
0871: additional.setValue(Field.PAGES, "11-61");
0872:
0873: return result;
0874: }
0875:
0876: /**
0877: * Returns default capabilities of the clusterer.
0878: *
0879: * @return the capabilities of this clusterer
0880: */
0881: public Capabilities getCapabilities() {
0882: Capabilities result = super .getCapabilities();
0883:
0884: // attributes
0885: result.enable(Capability.NOMINAL_ATTRIBUTES);
0886: result.enable(Capability.NUMERIC_ATTRIBUTES);
0887: result.enable(Capability.DATE_ATTRIBUTES);
0888: result.enable(Capability.MISSING_VALUES);
0889:
0890: // other
0891: result.setMinimumNumberInstances(0);
0892:
0893: return result;
0894: }
0895:
0896: /**
0897: * Builds the clusterer.
0898: *
0899: * @param data the training instances.
0900: * @throws Exception if something goes wrong.
0901: */
0902: public void buildClusterer(Instances data) throws Exception {
0903: m_numberOfClusters = -1;
0904: m_cobwebTree = null;
0905: m_numberSplits = 0;
0906: m_numberMerges = 0;
0907:
0908: // can clusterer handle the data?
0909: getCapabilities().testWithFail(data);
0910:
0911: // randomize the instances
0912: data = new Instances(data);
0913: data.randomize(new Random(getSeed()));
0914:
0915: for (int i = 0; i < data.numInstances(); i++) {
0916: updateClusterer(data.instance(i));
0917: }
0918:
0919: updateFinished();
0920: }
0921:
0922: /**
0923: * Singals the end of the updating.
0924: */
0925: public void updateFinished() {
0926: determineNumberOfClusters();
0927: }
0928:
0929: /**
0930: * Classifies a given instance.
0931: *
0932: * @param instance the instance to be assigned to a cluster
0933: * @return the number of the assigned cluster as an interger
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -