📄 cobweb.java
字号:
0312: // consider for merging and splitting
0313: m_children.removeElementAt(m_children.size() - 1);
0314:
0315: // now determine the best host (and the second best)
0316: int best = 0;
0317: int secondBest = 0;
0318: for (int i = 0; i < categoryUtils.length; i++) {
0319: if (categoryUtils[i] > categoryUtils[secondBest]) {
0320: if (categoryUtils[i] > categoryUtils[best]) {
0321: secondBest = best;
0322: best = i;
0323: } else {
0324: secondBest = i;
0325: }
0326: }
0327: }
0328:
0329: CNode a = (CNode) m_children.elementAt(best);
0330: CNode b = (CNode) m_children.elementAt(secondBest);
0331: if (categoryUtils[best] > bestHostCU) {
0332: bestHostCU = categoryUtils[best];
0333: finalBestHost = a;
0334: // System.out.println("Node is best");
0335: }
0336:
0337: if (structureFrozen) {
0338: if (finalBestHost == newLeaf) {
0339: return null; // *this* node is the best host
0340: } else {
0341: return finalBestHost;
0342: }
0343: }
0344:
0345: double mergedCU = -Double.MAX_VALUE;
0346: CNode merged = new CNode(m_numAttributes);
0347: if (a != b) {
0348: mergedCU = cuScoreForBestTwoMerged(merged, a, b,
0349: newInstance);
0350:
0351: if (mergedCU > bestHostCU) {
0352: bestHostCU = mergedCU;
0353: finalBestHost = merged;
0354: }
0355: }
0356:
0357: // Consider splitting the best
0358: double splitCU = -Double.MAX_VALUE;
0359: double splitBestChildCU = -Double.MAX_VALUE;
0360: double splitPlusNewLeafCU = -Double.MAX_VALUE;
0361: double splitPlusMergeBestTwoCU = -Double.MAX_VALUE;
0362: if (a.m_children != null) {
0363: FastVector tempChildren = new FastVector();
0364:
0365: for (int i = 0; i < m_children.size(); i++) {
0366: CNode existingChild = (CNode) m_children
0367: .elementAt(i);
0368: if (existingChild != a) {
0369: tempChildren.addElement(existingChild);
0370: }
0371: }
0372: for (int i = 0; i < a.m_children.size(); i++) {
0373: CNode promotedChild = (CNode) a.m_children
0374: .elementAt(i);
0375: tempChildren.addElement(promotedChild);
0376: }
0377: // also add the new leaf
0378: tempChildren.addElement(newLeaf);
0379:
0380: FastVector saveStatusQuo = m_children;
0381: m_children = tempChildren;
0382: splitPlusNewLeafCU = categoryUtility(); // split + new leaf
0383: // remove the new leaf
0384: tempChildren.removeElementAt(tempChildren.size() - 1);
0385: // now look for best and second best
0386: categoryUtils = cuScoresForChildren(newInstance);
0387:
0388: // now determine the best host (and the second best)
0389: best = 0;
0390: secondBest = 0;
0391: for (int i = 0; i < categoryUtils.length; i++) {
0392: if (categoryUtils[i] > categoryUtils[secondBest]) {
0393: if (categoryUtils[i] > categoryUtils[best]) {
0394: secondBest = best;
0395: best = i;
0396: } else {
0397: secondBest = i;
0398: }
0399: }
0400: }
0401: CNode sa = (CNode) m_children.elementAt(best);
0402: CNode sb = (CNode) m_children.elementAt(secondBest);
0403: splitBestChildCU = categoryUtils[best];
0404:
0405: // now merge best and second best
0406: CNode mergedSplitChildren = new CNode(m_numAttributes);
0407: if (sa != sb) {
0408: splitPlusMergeBestTwoCU = cuScoreForBestTwoMerged(
0409: mergedSplitChildren, sa, sb, newInstance);
0410: }
0411: splitCU = (splitBestChildCU > splitPlusNewLeafCU) ? splitBestChildCU
0412: : splitPlusNewLeafCU;
0413: splitCU = (splitCU > splitPlusMergeBestTwoCU) ? splitCU
0414: : splitPlusMergeBestTwoCU;
0415:
0416: if (splitCU > bestHostCU) {
0417: bestHostCU = splitCU;
0418: finalBestHost = this ;
0419: // tempChildren.removeElementAt(tempChildren.size()-1);
0420: } else {
0421: // restore the status quo
0422: m_children = saveStatusQuo;
0423: }
0424: }
0425:
0426: if (finalBestHost != this ) {
0427: // can commit the instance to the set of instances at this node
0428: m_clusterInstances.add(newInstance);
0429: } else {
0430: m_numberSplits++;
0431: }
0432:
0433: if (finalBestHost == merged) {
0434: m_numberMerges++;
0435: m_children.removeElementAt(m_children.indexOf(a));
0436: m_children.removeElementAt(m_children.indexOf(b));
0437: m_children.addElement(merged);
0438: }
0439:
0440: if (finalBestHost == newLeaf) {
0441: finalBestHost = new CNode(m_numAttributes);
0442: m_children.addElement(finalBestHost);
0443: }
0444:
0445: if (bestHostCU < m_cutoff) {
0446: if (finalBestHost == this ) {
0447: // splitting was the best, but since we are cutting all children
0448: // recursion is aborted and we still need to add the instance
0449: // to the set of instances at this node
0450: m_clusterInstances.add(newInstance);
0451: }
0452: m_children = null;
0453: finalBestHost = null;
0454: }
0455:
0456: if (finalBestHost == this ) {
0457: // splitting is still the best, so downdate the stats as
0458: // we'll be recursively calling on this node
0459: updateStats(newInstance, true);
0460: }
0461:
0462: return finalBestHost;
0463: }
0464:
0465: /**
0466: * Adds the supplied node as a child of this node. All of the child's
0467: * instances are added to this nodes instances
0468: *
0469: * @param child the child to add
0470: */
0471: protected void addChildNode(CNode child) {
0472: for (int i = 0; i < child.m_clusterInstances.numInstances(); i++) {
0473: Instance temp = child.m_clusterInstances.instance(i);
0474: m_clusterInstances.add(temp);
0475: updateStats(temp, false);
0476: }
0477:
0478: if (m_children == null) {
0479: m_children = new FastVector();
0480: }
0481: m_children.addElement(child);
0482: }
0483:
0484: /**
0485: * Computes the utility of all children with respect to this node
0486: *
0487: * @return the category utility of the children with respect to this node.
0488: * @throws Exception if there are no children
0489: */
0490: protected double categoryUtility() throws Exception {
0491:
0492: if (m_children == null) {
0493: throw new Exception("categoryUtility: No children!");
0494: }
0495:
0496: double totalCU = 0;
0497:
0498: for (int i = 0; i < m_children.size(); i++) {
0499: CNode child = (CNode) m_children.elementAt(i);
0500: totalCU += categoryUtilityChild(child);
0501: }
0502:
0503: totalCU /= (double) m_children.size();
0504: return totalCU;
0505: }
0506:
0507: /**
0508: * Computes the utility of a single child with respect to this node
0509: *
0510: * @param child the child for which to compute the utility
0511: * @return the utility of the child with respect to this node
0512: * @throws Exception if something goes wrong
0513: */
0514: protected double categoryUtilityChild(CNode child)
0515: throws Exception {
0516:
0517: double sum = 0;
0518: for (int i = 0; i < m_numAttributes; i++) {
0519: if (m_clusterInstances.attribute(i).isNominal()) {
0520: for (int j = 0; j < m_clusterInstances.attribute(i)
0521: .numValues(); j++) {
0522: double x = child.getProbability(i, j);
0523: double y = getProbability(i, j);
0524: sum += (x * x) - (y * y);
0525: }
0526: } else {
0527: // numeric attribute
0528: sum += ((m_normal / child.getStandardDev(i)) - (m_normal / getStandardDev(i)));
0529:
0530: }
0531: }
0532: return (child.m_totalInstances / m_totalInstances) * sum;
0533: }
0534:
0535: /**
0536: * Returns the probability of a value of a nominal attribute in this node
0537: *
0538: * @param attIndex the index of the attribute
0539: * @param valueIndex the index of the value of the attribute
0540: * @return the probability
0541: * @throws Exception if the requested attribute is not nominal
0542: */
0543: protected double getProbability(int attIndex, int valueIndex)
0544: throws Exception {
0545:
0546: if (!m_clusterInstances.attribute(attIndex).isNominal()) {
0547: throw new Exception(
0548: "getProbability: attribute is not nominal");
0549: }
0550:
0551: if (m_attStats[attIndex].totalCount <= 0) {
0552: return 0;
0553: }
0554:
0555: return (double) m_attStats[attIndex].nominalCounts[valueIndex]
0556: / (double) m_attStats[attIndex].totalCount;
0557: }
0558:
0559: /**
0560: * Returns the standard deviation of a numeric attribute
0561: *
0562: * @param attIndex the index of the attribute
0563: * @return the standard deviation
0564: * @throws Exception if an error occurs
0565: */
0566: protected double getStandardDev(int attIndex) throws Exception {
0567: if (!m_clusterInstances.attribute(attIndex).isNumeric()) {
0568: throw new Exception(
0569: "getStandardDev: attribute is not numeric");
0570: }
0571:
0572: m_attStats[attIndex].numericStats.calculateDerived();
0573: double stdDev = m_attStats[attIndex].numericStats.stdDev;
0574: if (Double.isNaN(stdDev) || Double.isInfinite(stdDev)) {
0575: return m_acuity;
0576: }
0577:
0578: return Math.max(m_acuity, stdDev);
0579: }
0580:
0581: /**
0582: * Update attribute stats using the supplied instance.
0583: *
0584: * @param updateInstance the instance for updating
0585: * @param delete true if the values of the supplied instance are
0586: * to be removed from the statistics
0587: */
0588: protected void updateStats(Instance updateInstance,
0589: boolean delete) {
0590:
0591: if (m_attStats == null) {
0592: m_attStats = new AttributeStats[m_numAttributes];
0593: for (int i = 0; i < m_numAttributes; i++) {
0594: m_attStats[i] = new AttributeStats();
0595: if (m_clusterInstances.attribute(i).isNominal()) {
0596: m_attStats[i].nominalCounts = new int[m_clusterInstances
0597: .attribute(i).numValues()];
0598: } else {
0599: m_attStats[i].numericStats = new Stats();
0600: }
0601: }
0602: }
0603: for (int i = 0; i < m_numAttributes; i++) {
0604: if (!updateInstance.isMissing(i)) {
0605: double value = updateInstance.value(i);
0606: if (m_clusterInstances.attribute(i).isNominal()) {
0607: m_attStats[i].nominalCounts[(int) value] += (delete) ? (-1.0 * updateInstance
0608: .weight())
0609: : updateInstance.weight();
0610: m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance
0611: .weight())
0612: : updateInstance.weight();
0613: } else {
0614: if (delete) {
0615: m_attStats[i].numericStats.subtract(value,
0616: updateInstance.weight());
0617: } else {
0618: m_attStats[i].numericStats.add(value,
0619: updateInstance.weight());
0620: }
0621: }
0622: }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -