📄 inequalitysplitterbuilder.java
字号:
package jboost.learner;import java.util.ArrayList;import java.util.Collections;import java.util.List;import jboost.CandidateSplit;import jboost.NotSupportedException;import jboost.booster.Bag;import jboost.booster.Booster;import jboost.examples.Attribute;import jboost.examples.AttributeDescription;import jboost.examples.DiscreteAttribute;import jboost.examples.Example;import jboost.examples.Label;import jboost.examples.RealAttribute;import jboost.monitor.Monitor;/** a builder of splitters that partition data according to whether or * not a particular numerical attribute is larger than some value * @author Yoav Freund * @version $Header: /cvsroot/jboost/jboost/src/jboost/learner/InequalitySplitterBuilder.java,v 1.2 2007/10/13 04:32:28 aarvey Exp $ */class InequalitySplitterBuilder extends SplitterBuilder { /** Default constructor */ InequalitySplitterBuilder() { this.attributeIndex= -1; m_type= SplitterType.INEQUALITY_SPLITTER; } /** * Constructor for a non-root non-sortedListOwner InequalitySplitterBuilder * gets as parameters the set of pointers to its parent's data structures */ InequalitySplitterBuilder(int attributeIndex, double[] indexedValues, int[] sortedIndices, boolean[] potentialSplits, boolean[] examplesMask, int noOfElements, Booster booster, AttributeDescription[] ad) { this.attributeIndex= attributeIndex; this.sortedListOwner= false; this.indexedValues= indexedValues; this.sortedIndices= sortedIndices; this.potentialSplits= potentialSplits; super.examplesMask= examplesMask; super.noOfElements= noOfElements; super.booster= booster; isFinalized= true; isRoot= false; desc= ad; m_type= SplitterType.INEQUALITY_SPLITTER; } /** describe as a string for debugging printout */ public String toString() { String s= "InequalitySplitterBuilder for attribute " + attributeIndex + "\n\tindex\tvalue\tpotentialSplit\n"; if (sortedIndices == null) s += "sortedIndices is empty"; else for (int i= 0; i < sortedIndices.length; i++) { int index= sortedIndices[i]; s += i + "\t" + index + "\t" + indexedValues[index] + "\t" + potentialSplits[i] + "\n"; } return s; } /** * The builder = weak learner * @throws NotSupportedException * @return split */ public CandidateSplit build() throws NotSupportedException { int splitIndex; // the index of the best split point // in sortedIndices. The split is between // the indexed element and the one before it. Bag[] bag= new Bag[] { booster.newBag(), booster.newBag()}; int[] localIndices; boolean[] localSplits; if (sortedListOwner) { // data is ready from construction localIndices= sortedIndices; localSplits= potentialSplits; } else { // generate data for findBestSplit on the fly int i= 0; // index in the temporary arrays boolean split= false; for (int j= 0; j < sortedIndices.length; j++) if (examplesMask[sortedIndices[j]]) i++; localIndices= new int[i]; localSplits= new boolean[i]; i= 0; for (int j= 0; j < sortedIndices.length; j++) { split= split | potentialSplits[j]; // detect potential splits if (examplesMask[sortedIndices[j]]) { localIndices[i]= sortedIndices[j]; localSplits[i]= split; split= false; i++; } } } splitIndex= booster.findBestSplit(bag[0], bag[1], localIndices, localSplits); double threshold= (splitIndex == 0 ? -Double.MAX_VALUE : 0.5 * (indexedValues[localIndices[splitIndex]] + indexedValues[localIndices[splitIndex - 1]])); Splitter s= new InequalitySplitter(attributeIndex, threshold, desc[0]); return new CandidateSplit(this, s, bag, booster.getLoss(bag)); } /** * Take all the indices that reach this split * create two bags, one full and one empty * find the index whose value matches the value of the splitter * separate remove that index from the full bag and put it in the empty * pass the bag and splitter on as a new CandidateSplit * */ public CandidateSplit build(Splitter s) throws NotSupportedException { InequalitySplitter splitter= (InequalitySplitter) s; int splitIndex=-1; int count=0; int[] indices; if (sortedListOwner) { indices= sortedIndices; for (int j=0; j < indices.length; j++) { if (indices[j] == splitter.getIndex()) { splitIndex= j; } } } else { for (int j= 0; j < sortedIndices.length; j++) { if (examplesMask[sortedIndices[j]]) { count++; } } indices= new int[count]; for (int i=0,j=0; j < sortedIndices.length; j++) { if (examplesMask[sortedIndices[j]]) { indices[i]= sortedIndices[j]; if (indices[i] == splitter.getIndex()) { splitIndex= i; } i++; } } } Bag[] bag= new Bag[] { booster.newBag(), booster.newBag(indices)}; if (splitIndex >= 0) { bag[0].addExample(indices[splitIndex]); bag[1].subtractExample(indices[splitIndex]); } /* if (sortedListOwner) { // data is ready from construction localIndices= sortedIndices; localSplits= potentialSplits; } else { // generate data for findBestSplit on the fly int index= 0; // index in the temporary arrays for (int j= 0; j < sortedIndices.length; j++) { if (examplesMask[sortedIndices[j]]) { index++; } } localIndices= new int[index]; index= 0; for (int j= 0; j < sortedIndices.length; j++) { if (examplesMask[sortedIndices[j]]) { localIndices[index]= sortedIndices[j]; index++; } } } for (int i=0; i < localIndices.length; i++) { if (indexedValues[localIndices[i]] >= splitter.getThreshold()) { // make bags based on this split } } splitIndex= booster.findBestSplit(bag[0], bag[1], localIndices, localSplits); */ return new CandidateSplit(this, splitter, bag, booster.getLoss(bag)); } /** construct a new SplitterBuilder based on this one and some * subset of the data. * @param em an array holding the exampleMask for the subset * @param count the no of elements in the subset. */ public SplitterBuilder spawn(boolean[] em, int count) { return new InequalitySplitterBuilder( attributeIndex, indexedValues, sortedIndices, potentialSplits, em, count, booster, desc); } /** Figures out the split of the data for a given splitter. * The idea here is to be able to use a splitter without retaining * all of the examples. * @param The splitter on which to base the split * @returns The partition of the data or null if the splitter is not compatible. */ public int[][] split(Splitter sp) { if (attributeIndex != sp.getIndex()) return (null); double threshold= ((InequalitySplitter) sp).getThreshold(); int[][] result= new int[2][]; if (sortedListOwner) { int cutIndex; // locate the place where the // list should be cut into two for (cutIndex= 0; cutIndex < sortedIndices.length; cutIndex++) { if (indexedValues[sortedIndices[cutIndex]] > threshold) break; } result[0]= new int[cutIndex]; // create first list for (int j= 0; j < cutIndex; j++) result[0][j]= sortedIndices[j]; result[1]= new int[sortedIndices.length - cutIndex]; // create second list for (int j= cutIndex; j < sortedIndices.length; j++) result[1][j - cutIndex]= sortedIndices[j]; } else { int[] l= new int[noOfElements]; // temporary storage for list of elements int j= 0; int cutIndex= -1; int index= 0; for (int i= 0; i < sortedIndices.length; i++) { index= sortedIndices[i]; if (examplesMask[index]) { if ((cutIndex == -1) && (indexedValues[index] > threshold)) cutIndex= j; l[j++]= index; } } if (cutIndex==-1) { cutIndex=-1; System.out.println(this); } result[0]= new int[cutIndex]; // create first list int noGreater= j; result[1]= new int[noGreater - cutIndex]; // create second list for (j= 0; j < cutIndex; j++) result[0][j]= l[j]; for (; j < noGreater; j++) result[1][j - cutIndex]= l[j];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -