📄 equalitysplitterbuilder.java

📁 Boosting算法软件包
💻 JAVA
字号:
package jboost.learner;import java.util.Arrays;import java.util.Vector;import sun.tools.tree.CastExpression;import jboost.CandidateSplit;import jboost.booster.Bag;import jboost.booster.AbstractBooster;import jboost.booster.Booster;import jboost.examples.Attribute;import jboost.examples.AttributeDescription;import jboost.examples.DiscreteAttribute;import jboost.examples.Example;import jboost.examples.ExampleDescription;import jboost.examples.Label;import jboost.monitor.Monitor;import jboost.tokenizer.DataStream;import jboost.tokenizer.jboost_DataStream;/** * Finds the best split based on an attribute==value test on a  * <i>DiscreteAttribute</i>. *  * ISSUES: Requires an intersect function that should be in booster?  * ISSUES: There should be a mechanism to say "no useful splits"  * * @author Nigel */public class EqualitySplitterBuilder extends SplitterBuilder {   /** Finds the best equality split on a discrete attribute using attr==value tests */    public CandidateSplit build(){      Bag[] attributeValues=new Bag[attr_val.length];      Bag universe = booster.newBag();      int j;      for (j=0; j < attr_val.length; j++) {        attributeValues[j]= intersect(examplesMask, attr_val[j]);        universe.addBag(attributeValues[j]);      }            double minLoss=Double.MAX_VALUE;      int minVal=0;      double loss=0;      Bag[] tmpBags=new Bag[2];      tmpBags[1] = booster.newBag();      for (j=0; j < attributeValues.length;j++) {        tmpBags[1].copyBag(universe);        tmpBags[0]= attributeValues[j];        tmpBags[1].subtractBag(tmpBags[0]);        if((loss=booster.getLoss(tmpBags)) < minLoss){          minLoss=loss;          minVal=j;        }      }      tmpBags[1].copyBag(universe);      tmpBags[0]= attributeValues[minVal];      tmpBags[1].subtractBag(tmpBags[0]);  	Splitter s=new EqualitySplitter(attributeIndex, minVal, 2, desc[0]);	//	if(Monitor.logLevel>3) Monitor.log(s);	return(new CandidateSplit(this, s, tmpBags, minLoss));    }        /**     * @param splitter     * @return split     */    public CandidateSplit build(Splitter s) {      EqualitySplitter splitter= (EqualitySplitter) s;      Bag[] bags= new Bag[2];      Bag[] attributeValues=new Bag[attr_val.length];      Bag universe = booster.newBag();      int j;      double minLoss= Double.MAX_VALUE;            for (j=0; j < attr_val.length; j++) {        attributeValues[j]= intersect(examplesMask, attr_val[j]);        universe.addBag(attributeValues[j]);      }            bags[1]= booster.newBag();      bags[1].copyBag(universe);      bags[0]= attributeValues[splitter.getIndex()];      bags[1].subtractBag(bags[0]);      minLoss= booster.getLoss(bags);           return(new CandidateSplit(this, splitter, bags, minLoss));    }            /**     * Find the best attribute value to use     * and return the list of bags that split along that value     * @return list of bags     */    private Bag[] findBestAttributeValues() {      Bag[] attributeValues=new Bag[attr_val.length];      Bag universe = booster.newBag();      int j;      for (j=0; j < attr_val.length; j++) {        attributeValues[j]= intersect(examplesMask, attr_val[j]);        universe.addBag(attributeValues[j]);      }            double minLoss=Double.MAX_VALUE;      int minVal=0;      double loss=0;      Bag[] tmpBags=new Bag[2];      tmpBags[1] = booster.newBag();      for (j=0; j < attributeValues.length;j++) {        tmpBags[1].copyBag(universe);        tmpBags[0]= attributeValues[j];        tmpBags[1].subtractBag(tmpBags[0]);        if((loss=booster.getLoss(tmpBags)) < minLoss){          minLoss=loss;          minVal=j;        }      }      tmpBags[1].copyBag(universe);      tmpBags[0]= attributeValues[minVal];      tmpBags[1].subtractBag(tmpBags[0]);      return tmpBags;    }    /** construct a new SplitterBuilder based on this one and some      *  subset of the data.     *  @param an array holding the exampleMask for the subset     *  @param the no of elements in the subset.  				*/    public SplitterBuilder spawn(boolean[] em,int count) {	return new EqualitySplitterBuilder	    (booster,em,count,attr_val,attributeIndex,desc);    }    /** Figures out the split of the data for a given splitter.     *  In other words determines which examples make it from THIS splitterBuilder     *  to each side of the split.     *  The idea here is to be able to use a splitter without retaining     *  all of the examples.     *  @param The splitter on which to base the split     *  @returns The partition of the data or null of the splitter is not compatible.  */    public int[][] split(Splitter sp) {        if(attributeIndex!=sp.getIndex()) return(null);        int v=((EqualitySplitter)sp).getValue();        int d=((EqualitySplitter)sp).getDegree();        int[][] retval=new int[d][];        int tmp=attr_val[v].length;        int i=0;        int count0=0;        int count1=0;                for(int j=0;j<attr_val[v].length;j++) {            if(examplesMask[attr_val[v][j]]) {                count0++;            }        }        retval[0]=new int[count0];        count0=0;        for(int j=0;j<attr_val[v].length;j++) {            if(examplesMask[attr_val[v][j]]) {                retval[0][count0]=attr_val[v][j];                count0++;            }        }        if(d==2) {            for(i=0;i<attr_val.length;i++) {            	if(i!=v) {                    for(int j=0;j<attr_val[i].length;j++) {                    	if(examplesMask[attr_val[i][j]])                        	    count1++;                    }            	}            }            retval[1]=new int[count1];	    count1=0;            for(i=0;i<attr_val.length;i++) {                if(i!=v && d==2) {                    for(int j=0;j<attr_val[i].length;j++) {                    	if(examplesMask[attr_val[i][j]]) {                            retval[1][count1]=attr_val[i][j];                            count1++;                    	}                    }            	}            }	}        return(retval);    }        /** Constructor */    public EqualitySplitterBuilder(Booster b, boolean[] em,        int noEl,int[][] a,        int attributeIndex,        AttributeDescription[] attr) {      booster=b;      examplesMask=em;      noOfElements=noEl;      attr_val=a;      isRoot=false;      isFinalized=true;      this.attributeIndex = attributeIndex;      desc=attr;      m_type= SplitterType.EQUALITY_SPLITTER;    }        /** Default Constructor */    public EqualitySplitterBuilder() {      booster=null;      examplesMask=null;      noOfElements=-1;      attr_val=null;      isRoot=false;      isFinalized=true;      desc=null;      m_type= SplitterType.EQUALITY_SPLITTER;    }    /** describe as a string for dubugging printout */    public String toString() {	boolean first=true;        String s="EqualitySplitterBuilder for attribute "+attributeIndex+"\n";	if(isRoot) {            s+="Is Root:\nvalue\tExample\n";	    if(attr_val==null) s+= "table is null\n";	    else {		for(int i=0; i<attr_val.length; i++) {		    s+=i+"\t";		    s+= attr_val[i][0];		    for(int j=1;j<attr_val[i].length;j++){			s+=","+attr_val[i][j];		    }		    s+="\n";		}	    }	}	s+="ExamplesMask:\n";	if(examplesMask == null) s+= "is empty\n";	else {	    for(int i=0;i<examplesMask.length;i++) {		if(examplesMask[i]) s+="1";		else s+="0";	    }	}        return s;    }   /**    * The constructor for the root splitterbuilder     * @param index the index of the relevant attribute    * @param booster the booster that is to be used by this builder 		*/    public EqualitySplitterBuilder(int index, Booster booster, AttributeDescription[] ad) {	desc=ad;	isRoot=true;	isFinalized=false;	try{	    maxNoVals=ad[0].getNoOfValues();	}	catch (IncompAttException e) {	    throw new RuntimeException		("Incomatiable Attribute error in EqualitySplitterBuilder"		 +" index="+index+"\n"+e.getMessage());	}	if(maxNoVals<0) maxNoVals=0;	attributeIndex = index;	this.booster = booster;	attr_val=null;	maxIndex=-1;	noOfElements=0;	valueVec=new Vector();	m_type= SplitterType.EQUALITY_SPLITTER;    }   /** Add a single example to the internal data structure     *  @param index the index of the example in the dataset    *  @param example the example 						*/    public void addExample(int index, Example example) throws IncompAttException {	if(!isRoot || isFinalized) throw new 		RuntimeException("Trying to addExample() to non-root or finalized SplitterBuilder");	DiscreteAttribute a=null;	int exVal=0;	Attribute t = example.getAttribute(attributeIndex);	// check that attribute is of the correct class	try {	    a = (DiscreteAttribute) t;	// try downcasting 	}	catch (ClassCastException e) {	    throw new IncompAttException(index,attributeIndex					 ,"DiscreteAttribute",a.getClass());	}	if(index>maxIndex) {	    for(int i=maxIndex+1;i<=index;i++) 		valueVec.add(new Integer(-1));	    maxIndex=index;	}	if(a.isDefined()) {	    exVal=a.getValue();	    if(exVal>maxNoVals-1) maxNoVals=exVal+1;	    valueVec.set(index,new Integer(exVal));	}    }    public void finalizeData() {	if(!isRoot || isFinalized) throw new 		RuntimeException("Trying to finalizeData() to non-root or finalized SplitterBuilder");	int i,j;	int s;		int value=0;	noOfElements=maxIndex+1;	examplesMask=new boolean[noOfElements];	Arrays.fill(examplesMask,true);	Vector[] attrVec=new Vector[maxNoVals];	for(i=0;i<maxNoVals;i++) 	    attrVec[i]=new Vector();	for(i=0;i<maxIndex+1;i++) {	    value=((Integer)valueVec.get(i)).intValue();	    if(value>=0) {		attrVec[value].add(new Integer(i));	    }	}	attr_val=new int[maxNoVals][];	for(i=0;i<maxNoVals;i++){		s=attrVec[i].size();		attr_val[i]=new int[s];		for(j=0;j<s;j++){			attr_val[i][j]=((Integer)attrVec[i].elementAt(j)).intValue();		}		attrVec[i].clear();	}	isFinalized=true;	tmpAttrVals=null;	exampleMaskVec=null;	valueVec=null;    }//----------------------------- Protected Members ---------------------------------------//    /** The index of the attribute on which this bulder works 			*/    protected int attributeIndex;    /** A list of examples with each attribute=value, perhaps there is a     *  better data structure than an array of ints. But BitSet doesn't seem right     *  as each one would be very sparse.      *	One copy of this is generated by the root splitterBuilder and is pointed to      *	by all of its descendents. 						*/    protected int[][] attr_val;//------------------------------ Private Members ---------------------------------------//    /** a temporary storage for the values and indices of the        attributes. This storage is freed up when the builder is        finalized. */    private Vector tmpAttrVals;    private int maxNoVals;    private Vector exampleMaskVec;    private Vector valueVec;    /** The maximum index reached on reading the data */    private int maxIndex;    /** It would be nice if there was a generic intersect function that      *  returned a bag, but where to put it. 					*/    private Bag intersect(boolean[] mask,int[] examples) {	Bag b=booster.newBag();	for(int i=0;i<examples.length;i++)		if(mask[examples[i]]) 			b.addExample(examples[i]);	return(b);    }//----------------------------- Test Stuff --------------------------------------------//    /** A main for testing this class */    /*static public void main(String[] argv) {        try{	    DataStream ds=new jboost_DataStream(false,"test (one,two,three)\n label (one,two)\n");	    ExampleDescription ed=ds.getExampleDescription();	    AbstractBooster boos=AbstractBooster.getInstance();	    AttributeDescription[] ad=new AttributeDescription[1];	    ad[0]=ed.getAttributeDescription(0);            EqualitySplitterBuilder sb = new EqualitySplitterBuilder(0,boos,ad);            int[] trainLabels=   {  0,  0,  1,  0,  0,  1,  0,   0,  1,   1,  1,  0};            int[] trainValues={0,2,2,2,1,2,0,1,0,0,2,1};            int[] testLabels=   {  0,  0,  1,  0,  0,  1,  0,   0,  1,   1,  1,  0};            int[] testValues={0,2,2,2,1,2,0,1,0,0,2,1};                        Example x;            Attribute[] attArray = new Attribute[1];            Label l;                        if(Monitor.logLevel>3) Monitor.log("Input: \t index \t value \t label");            for(int i=0; i<trainLabels.length; i++) {                l=new Label(trainLabels[i]);                attArray[0]=new DiscreteAttribute(trainValues[i]);                x=new Example(attArray,l);                if(Monitor.logLevel>3) Monitor.log(" \t  "+i+"\t  "+trainValues[i]+"\t  "+trainLabels[i]);                try{                    sb.addExample(i,x);		    boos.addExample(i,l);                }                 catch(IncompAttException e) {                    if(Monitor.logLevel>3) Monitor.log(e.getMessage());                }            }            sb.finalizeData();	    boos.finalizeData();            if(Monitor.logLevel>3) Monitor.log(sb);	    CandidateSplit bC=sb.build();	    if(Monitor.logLevel>3) Monitor.log(bC);	    boolean[] tmpMask=new boolean[12];	    Arrays.fill(tmpMask,false);	    tmpMask[0]=true;	    tmpMask[9]=true;	    tmpMask[1]=true;	    tmpMask[4]=true;	    tmpMask[2]=true;	    tmpMask[10]=true;	    SplitterBuilder esb=sb.spawn(tmpMask,1);	    if(Monitor.logLevel>3) Monitor.log(esb);	    bC=esb.build();	    if(Monitor.logLevel>3) Monitor.log(bC);        }        catch(Exception e) {            if(Monitor.logLevel>3) Monitor.log(e.getMessage());            e.printStackTrace();        }    } */}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -