⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 adtree.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
      break;    case SEARCHPATH_RANDOM:       goDownRandomPathSingle(currentNode, posInstances, negInstances);      break;    }  }  /**   * Continues single (two-class optimized) search by investigating every node in the   * subtree under currentNode.   *   * @param currentNode the root of the subtree to be searched   * @param posInstances the positive-class instances that apply at this node   * @param negInstances the negative-class instances that apply at this node   * @exception Exception if search fails   */  private void goDownAllPathsSingle(PredictionNode currentNode,				    Instances posInstances, Instances negInstances)    throws Exception  {    for (Enumeration e = currentNode.children(); e.hasMoreElements(); ) {      Splitter split = (Splitter) e.nextElement();      for (int i=0; i<split.getNumOfBranches(); i++)	searchForBestTestSingle(split.getChildForBranch(i),				split.instancesDownBranch(i, posInstances),				split.instancesDownBranch(i, negInstances));    }  }  /**   * Continues single (two-class optimized) search by investigating only the path   * with the most heavily weighted instances.   *   * @param currentNode the root of the subtree to be searched   * @param posInstances the positive-class instances that apply at this node   * @param negInstances the negative-class instances that apply at this node   * @exception Exception if search fails   */  private void goDownHeaviestPathSingle(PredictionNode currentNode,					Instances posInstances, Instances negInstances)    throws Exception  {    Splitter heaviestSplit = null;    int heaviestBranch = 0;    double largestWeight = 0.0;    for (Enumeration e = currentNode.children(); e.hasMoreElements(); ) {      Splitter split = (Splitter) e.nextElement();      for (int i=0; i<split.getNumOfBranches(); i++) {	double weight =	  split.instancesDownBranch(i, posInstances).sumOfWeights() +	  split.instancesDownBranch(i, negInstances).sumOfWeights();	if (weight > largestWeight) {	  heaviestSplit = split;	  heaviestBranch = i;	  largestWeight = weight;	}      }    }    if (heaviestSplit != null)      searchForBestTestSingle(heaviestSplit.getChildForBranch(heaviestBranch),			      heaviestSplit.instancesDownBranch(heaviestBranch,								posInstances),			      heaviestSplit.instancesDownBranch(heaviestBranch,								negInstances));  }  /**   * Continues single (two-class optimized) search by investigating only the path   * with the best Z-pure value at each branch.   *   * @param currentNode the root of the subtree to be searched   * @param posInstances the positive-class instances that apply at this node   * @param negInstances the negative-class instances that apply at this node   * @exception Exception if search fails   */  private void goDownZpurePathSingle(PredictionNode currentNode,				     Instances posInstances, Instances negInstances)    throws Exception  {    double lowestZpure = m_search_smallestZ; // do z-pure cutoff    PredictionNode bestPath = null;    Instances bestPosSplit = null, bestNegSplit = null;    // search for branch with lowest Z-pure    for (Enumeration e = currentNode.children(); e.hasMoreElements(); ) {      Splitter split = (Splitter) e.nextElement();      for (int i=0; i<split.getNumOfBranches(); i++) {	Instances posSplit = split.instancesDownBranch(i, posInstances);	Instances negSplit = split.instancesDownBranch(i, negInstances);	double newZpure = calcZpure(posSplit, negSplit);	if (newZpure < lowestZpure) {	  lowestZpure = newZpure;	  bestPath = split.getChildForBranch(i);	  bestPosSplit = posSplit;	  bestNegSplit = negSplit;	}      }    }    if (bestPath != null)      searchForBestTestSingle(bestPath, bestPosSplit, bestNegSplit);  }  /**   * Continues single (two-class optimized) search by investigating a random path.   *   * @param currentNode the root of the subtree to be searched   * @param posInstances the positive-class instances that apply at this node   * @param negInstances the negative-class instances that apply at this node   * @exception Exception if search fails   */  private void goDownRandomPathSingle(PredictionNode currentNode,				      Instances posInstances, Instances negInstances)    throws Exception {    FastVector children = currentNode.getChildren();    Splitter split = (Splitter) children.elementAt(getRandom(children.size()));    int branch = getRandom(split.getNumOfBranches());    searchForBestTestSingle(split.getChildForBranch(branch),			    split.instancesDownBranch(branch, posInstances),			    split.instancesDownBranch(branch, negInstances));  }  /**   * Investigates the option of introducing a nominal split under currentNode. If it   * finds a split that has a Z-value lower than has already been found it will   * update the search information to record this as the best option so far.    *   * @param attIndex index of a nominal attribute to create a split from   * @param currentNode the parent under which a split is to be considered   * @param posInstances the positive-class instances that apply at this node   * @param negInstances the negative-class instances that apply at this node   */  private void evaluateNominalSplitSingle(int attIndex, PredictionNode currentNode,					  Instances posInstances, Instances negInstances)  {        double[] indexAndZ = findLowestZNominalSplit(posInstances, negInstances, attIndex);    if (indexAndZ[1] < m_search_smallestZ) {      m_search_smallestZ = indexAndZ[1];      m_search_bestInsertionNode = currentNode;      m_search_bestSplitter = new TwoWayNominalSplit(attIndex, (int) indexAndZ[0]);      m_search_bestPathPosInstances = posInstances;      m_search_bestPathNegInstances = negInstances;    }  }  /**   * Investigates the option of introducing a two-way numeric split under currentNode.   * If it finds a split that has a Z-value lower than has already been found it will   * update the search information to record this as the best option so far.    *   * @param attIndex index of a numeric attribute to create a split from   * @param currentNode the parent under which a split is to be considered   * @param posInstances the positive-class instances that apply at this node   * @param negInstances the negative-class instances that apply at this node   * @param allInstances all of the instances the apply at this node (pos+neg combined)   */  private void evaluateNumericSplitSingle(int attIndex, PredictionNode currentNode,					  Instances posInstances, Instances negInstances,					  Instances allInstances)    throws Exception  {        double[] splitAndZ = findLowestZNumericSplit(allInstances, attIndex);    if (splitAndZ[1] < m_search_smallestZ) {      m_search_smallestZ = splitAndZ[1];      m_search_bestInsertionNode = currentNode;      m_search_bestSplitter = new TwoWayNumericSplit(attIndex, splitAndZ[0]);      m_search_bestPathPosInstances = posInstances;      m_search_bestPathNegInstances = negInstances;    }  }  /**   * Calculates the prediction value used for a particular set of instances.   *   * @param posInstances the positive-class instances   * @param negInstances the negative-class instances   * @return the prediction value   */  private double calcPredictionValue(Instances posInstances, Instances negInstances) {        return 0.5 * Math.log( (posInstances.sumOfWeights() + 1.0)			  / (negInstances.sumOfWeights() + 1.0) );  }  /**   * Calculates the Z-pure value for a particular set of instances.   *   * @param posInstances the positive-class instances   * @param negInstances the negative-class instances   * @return the Z-pure value   */  private double calcZpure(Instances posInstances, Instances negInstances) {        double posWeight = posInstances.sumOfWeights();    double negWeight = negInstances.sumOfWeights();    return (2.0 * (Math.sqrt(posWeight+1.0) + Math.sqrt(negWeight+1.0))) +       (m_trainTotalWeight - (posWeight + negWeight));  }  /**   * Updates the weights of instances that are influenced by a new prediction value.   *   * @param posInstances positive-class instances to which the prediction value applies   * @param negInstances negative-class instances to which the prediction value applies   * @param predictionValue the new prediction value   */  private void updateWeights(Instances posInstances, Instances negInstances,			     double predictionValue) {        // do positives    double weightMultiplier = Math.pow(Math.E, -predictionValue);    for (Enumeration e = posInstances.enumerateInstances(); e.hasMoreElements(); ) {      Instance inst = (Instance) e.nextElement();      inst.setWeight(inst.weight() * weightMultiplier);    }    // do negatives    weightMultiplier = Math.pow(Math.E, predictionValue);    for (Enumeration e = negInstances.enumerateInstances(); e.hasMoreElements(); ) {      Instance inst = (Instance) e.nextElement();      inst.setWeight(inst.weight() * weightMultiplier);    }  }  /**   * Finds the nominal attribute value to split on that results in the lowest Z-value.   *   * @param posInstances the positive-class instances to split   * @param negInstances the negative-class instances to split   * @param attIndex the index of the nominal attribute to find a split for   * @return a double array, index[0] contains the value to split on, index[1] contains   * the Z-value of the split   */  private double[] findLowestZNominalSplit(Instances posInstances, Instances negInstances,					   int attIndex)  {        double lowestZ = Double.MAX_VALUE;    int bestIndex = 0;    // set up arrays    double[] posWeights = attributeValueWeights(posInstances, attIndex);    double[] negWeights = attributeValueWeights(negInstances, attIndex);    double posWeight = Utils.sum(posWeights);    double negWeight = Utils.sum(negWeights);    int maxIndex = posWeights.length;    if (maxIndex == 2) maxIndex = 1; // avoid repeating due to 2-way symmetry    for (int i = 0; i < maxIndex; i++) {      // calculate Z      double w1 = posWeights[i] + 1.0;      double w2 = negWeights[i] + 1.0;      double w3 = posWeight - w1 + 2.0;      double w4 = negWeight - w2 + 2.0;      double wRemainder = m_trainTotalWeight + 4.0 - (w1 + w2 + w3 + w4);      double newZ = (2.0 * (Math.sqrt(w1 * w2) + Math.sqrt(w3 * w4))) + wRemainder;      // record best option      if (newZ < lowestZ) { 	lowestZ = newZ;	bestIndex = i;      }    }    // return result    double[] indexAndZ = new double[2];    indexAndZ[0] = (double) bestIndex;    indexAndZ[1] = lowestZ;    return indexAndZ;   }  /**   * Simultanously sum the weights of all attribute values for all instances.   *   * @param instances the instances to get the weights from    * @param attIndex index of the attribute to be evaluated   * @return a double array containing the weight of each attribute value   */      private double[] attributeValueWeights(Instances instances, int attIndex)  {        double[] weights = new double[instances.attribute(attIndex).numValues()];    for(int i = 0; i < weights.length; i++) weights[i] = 0.0;    for (Enumeration e = instances.enumerateInstances(); e.hasMoreElements(); ) {      Instance inst = (Instance) e.nextElement();      if (!inst.isMissing(attIndex)) weights[(int)inst.value(attIndex)] += inst.weight();    }    return weights;  }  /**   * Finds the numeric split-point that results in the lowest Z-value.   *   * @param instances the instances to find a split for   * @param attIndex the index of the numeric attribute to find a split for   * @return a double array, index[0] contains the split-point, index[1] contains the   * Z-value of the split   */  private double[] findLowestZNumericSplit(Instances instances, int attIndex)    throws Exception  {        double splitPoint = 0.0;    double bestVal = Double.MAX_VALUE, currVal, currCutPoint;    int numMissing = 0;    double[][] distribution = new double[3][instances.numClasses()];       // compute counts for all the values    for (int i = 0; i < instances.numInstances(); i++) {      Instance inst = instances.instance(i);      if (!inst.isMissing(attIndex)) {	distribution[1][(int)inst.classValue()] += inst.weight();      } else {	distribution[2][(int)inst.classValue()] += inst.weight();	numMissing++;      }    }    // sort instances    instances.sort(attIndex);        // make split counts for each possible split and evaluate    for (int i = 0; i < instances.numInstances() - (numMissing + 1); i++) {      Instance inst = instances.instance(i);      Instance instPlusOne = instances.instance(i + 1);      distribution[0][(int)inst.classValue()] += inst.weight();      distribution[1][(int)inst.classValue()] -= inst.weight();      if (Utils.sm(inst.value(attIndex), instPlusOne.value(attIndex))) {	currCutPoint = (inst.value(attIndex) + instPlusOne.value(attIndex)) / 2.0;	currVal = conditionedZOnRows(distribution);	if (currVal < bestVal) {	  splitPoint = currCutPoint;	  bestVal = currVal;	}      }    }	    double[] splitAndZ = new double[2];    splitAndZ[0] = splitPoint;    splitAndZ[1] = bestVal;    return splitAndZ;  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -