⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 setsplitterbuilder.java

📁 Boosting算法软件包
💻 JAVA
字号:
package jboost.learner;import java.util.Arrays;import jboost.CandidateSplit;import jboost.NotSupportedException;import jboost.booster.Bag;import jboost.booster.AbstractBooster;import jboost.booster.Booster;import jboost.examples.Attribute;import jboost.examples.AttributeDescription;import jboost.examples.DiscreteAttribute;import jboost.examples.Example;import jboost.examples.Label;import jboost.examples.SetAttribute;import jboost.examples.WordTable;import jboost.monitor.Monitor;/** * This is where the splits that are based on <i>SetAttribute</i>s * are searched for.   * @author Yoram Singer * @version $Header: /cvsroot/jboost/jboost/src/jboost/learner/SetSplitterBuilder.java,v 1.1.1.1 2007/05/16 04:06:02 aarvey Exp $  *  */class SetSplitterBuilder extends SplitterBuilder {    /**   * Constructs an empty Root SetSplitterBuilder   * @param index - the index of the relevant attribute   * @param booster - the booster   */  public static SplitterBuilder  newSplitterBuilder(int index, AbstractBooster booster, AttributeDescription[] ad) {    return new SetSplitterBuilder(index, booster, true,ad);  }    public static  SplitterBuilder newSplitterBuilder(int i, Booster b, boolean a,AttributeDescription[] ad) {    return new SetSplitterBuilder(i, b, a,ad);  }      /**   *  default constructor   * XXX: is this an acceptable initial state? Do any clients use this ctor?    */  public SetSplitterBuilder() {    init(-1, null, false, null, null, -1, null, false, true);  }    /**   *  The constructor for the root splitter-builder    * @param index - the index of the relevant attribute   * @param booster - the booster that is to be used by this builder   * @param abstain - optional parameter telling the root to abstain   */  protected SetSplitterBuilder(int index, Booster booster, boolean abstain, AttributeDescription[] ad) {    init(index, booster, abstain, ad, null, -1, new SparseMatrix(), true, false);  }    protected SetSplitterBuilder(int index, Booster booster,AttributeDescription[] ad) {    init(index, booster, true, ad, null, -1, new SparseMatrix(), true, false);  }    /**   *  Basic constructor    */  private SetSplitterBuilder(int index, Booster b, boolean abstain,      boolean[] em, int noEl, SparseMatrix SM,      AttributeDescription[] ad) {    init(index, b, abstain, ad, em, noEl, SM, false, false);  }    /**   * A private virtual ctor that is used by all the public ctors of this class     * @param index the attribute index of this SplitterBuilder   * @param booster the booster used by this SplitterBuilder   * @param abstain if true, then this builder can abstain   * @param attributes the AttributeDescriptions used by this SplitterBuilder    * @param masks a boolean mask of the examples that reach this SplitterBuilder   * @param count the number of elements that reach this SplitterBuilder   * @param tokens the SparseMatrix used by this SplitterBuilder   * @param root if true, then this SplitterBuilder is used by the root of the tree   * @param finalized if true, then this SplitterBuilder has been finalized   */  private void init(int index, Booster booster, boolean abstain,       AttributeDescription[] attributes, boolean[] masks,      int count, SparseMatrix tokens, boolean root,      boolean finalized) {    attributeIndex= index;    this.booster= booster;    this.abstain= abstain;    desc= attributes;    examplesMask= masks;    noOfElements= count;    SM= tokens;    isRoot= root;    isFinalized= finalized;    m_type= SplitterType.SET_SPLITTER;  }    /**   * Construct a new splitter builder basd on the current one and   * a subset of the data defined through examplesMask (em).   * @param em - an array holding the exampleMask for the subset   * @param count - the number of elements in the subset   */  public SplitterBuilder spawn(boolean[] em, int count) {        /** OLD CODE --> atree now intersects examplesMask     ***     *** boolean[] tMask = new boolean[em.length];     *** int n;     *** for (int i = n = 0; i < em.length; i++) {     ***  tMask[i] = em[i] & examplesMask[i];     ***  if (tMask[i])     ***    n++;     *** }     ***     *** return new     ***  SetSplitterBuilder(attributeIndex, this.booster, this.abstain,     ***    tMask, n);     ***     **/        return new SetSplitterBuilder(        attributeIndex, this.booster, this.abstain, em, count, SM,desc);  }      /**   * Build a CandidateSplit from a Splitter.    * Uses the information in the Splitter to select the token to use   * for splitting the bag of examples that reach this SplitterBuilder   * @param s the Splitter to use. Will be cast to a SetSplitter   * @return candidate split that can be added to an InstrumentedAlternatingTree   */  public CandidateSplit build(Splitter s) throws NotSupportedException {    double loss;    Bag b0, b1, tb;    Bag[] bestBag;    int[] T;        SetSplitter split= (SetSplitter) s;        tb = booster.newBag();  // always contains all    // examples reaching this node    b0 = booster.newBag();    b1 = booster.newBag();        // Create default Bag if not abstaining when token is not present     if (!abstain) {      for (int i = 0; i < examplesMask.length; i++) {        if (examplesMask[i]) {          tb.addExample(i);        }      }      bestBag = new Bag[2];      bestBag[0] = booster.newBag();      bestBag[1] = booster.newBag();    } else {      bestBag = new Bag[1];      bestBag[0] = booster.newBag();    }        try {      T = SM.getColumn(split.getToken());    } catch (Exception e) {      String error = "build() : " + e.getMessage() +      " for attribute " + attributeIndex;      throw new NotSupportedException("SetSplitterBuilder", error);    }            b0.reset();    for (int j = 0; j < T.length; j++) {      if (examplesMask[T[j]]) {        b0.addExample(T[j]);      }    }      if (abstain) {      loss = booster.getLoss(new Bag[] {b0});      bestBag[0].copyBag(b0);    } else {      b1.copyBag(tb);      b1.subtractBag(b0);      loss = booster.getLoss(new Bag[] {b0, b1});      bestBag[0].copyBag(b0);      bestBag[1].copyBag(b1);    }    return(new CandidateSplit(this, split, bestBag, loss));  }    /**   *  The builder == Weak-Learner   *   * This is an inefficient version that checks each token by   * creating a new set of bags each call to build.    *   */  public CandidateSplit build() throws NotSupportedException {    double loss, minLoss;    int i, l, bestTok;    Bag b0, b1, tb;    Bag[] bestBag;    int[] T;            tb = booster.newBag();  // always contains all    // examples reaching this node    b0 = booster.newBag();    b1 = booster.newBag();        // Create default Bag if not abstaining when token is not present     if (!abstain) {      for (i = 0; i < examplesMask.length; i++) {        if (examplesMask[i]) {          tb.addExample(i);        }      }      bestBag = new Bag[2];      bestBag[0] = booster.newBag();      bestBag[1] = booster.newBag();    } else {      bestBag = new Bag[1];      bestBag[0] = booster.newBag();    }        /* Create and accumulate bag and then evaluate loss for each token */    minLoss = Double.MAX_VALUE;    bestTok = -1;        for (i = 0, l = SM.numCols(); i < l; i++) {      try {        T = SM.getColumn(i);      } catch (Exception e) {        String s = "build() : " + e.getMessage() +        " for attribute " + attributeIndex;        throw new NotSupportedException("SetSplitterBuilder", s);      }            if (T != null) {        b0.reset();        for (int j = 0; j < T.length; j++) {          if (examplesMask[T[j]]) {            b0.addExample(T[j]);          }        }                  if (abstain) {          loss = booster.getLoss(new Bag[] {b0});        } else {          b1.copyBag(tb);          b1.subtractBag(b0);          loss = booster.getLoss(new Bag[] {b0, b1});        }                // if(Monitor.logLevel>3) Monitor.log("Token " + i + " Score " + loss);                if (loss < minLoss) {          minLoss = loss;          bestTok = i;          if (abstain)            bestBag[0].copyBag(b0);          else {            bestBag[0].copyBag(b0);            bestBag[1].copyBag(b1);          }        }              }          }        Splitter s = new SetSplitter(attributeIndex, bestTok, abstain,desc[0]);        return(new CandidateSplit(this, s, bestBag, minLoss));  }        /**   * Figures out the split of the data for a given splitter.   * The idea here is to be able to use a splitter without   * retaining all of the examples.   * @param - The splitter on which to base the split   * @returns - The partition of the data or null if   *  the splitter is not compatible.   */  public int[][] split(Splitter sp) {    int[] T = null;        if (attributeIndex != sp.getIndex())      return null;        int token = ((SetSplitter) sp).getToken();        try {      T = SM.getColumn(token);    } catch (Exception e) {      String s = "SPLIT for attribute " + attributeIndex +      " could not be performed: internal structures were not finalized";       System.err.println(s);      System.exit(-1);    }        int i, j, l;            /** build the split for the examples in which the token appears **/    for (i = l = 0; i < T.length; i++)      if (examplesMask[T[i]])        l++;    int[] A0 = new int[l];    for (i = l = 0; i < T.length; i++)      if (examplesMask[T[i]])        A0[l++] = T[i];            /** build the split for the examples from which the token is absent **/    if (abstain)      return (new int[][] {A0});    else {      int[] A1 = new int[noOfElements-l];      for (i = j = l = 0; i < T.length; i++) {        int ex = T[i];        for (; j < ex; j++)          if (examplesMask[j])            A1[l++] = j;        j++;      }      for (; j < examplesMask.length; j++)        if (examplesMask[j])          A1[l++] = j;              return (new int[][] {A0, A1});    }  }    /**   * Add a single example to the internal data structure.s   * @param index - the index of the example in the dataset   * @param example - the example   */  public void addExample(int index, Example example) throws IncompAttException {    // ADDED    if(!isRoot || isFinalized) throw new    RuntimeException("Trying to addExample() to non-root or finalized SplitterBuilder");    // END_ADDED    SetAttribute a = null;    Attribute t = example.getAttribute(attributeIndex);        // Check running index    if (index != checkIndex) {      System.err.println(      "Examples not in consecutive order (SetSplitterBuilder)");      System.exit(-1);    }    checkIndex++;            // check that attribute is of the correct class    try {      a = (SetAttribute) t;	// try downcasting     } catch (ClassCastException e) {      throw new IncompAttException(index, attributeIndex          ,"SetAttribute", t.getClass());    }        if (a.isDefined()) {      int A[] = a.getList();      // SM.printit("Inside: ", A);      SM.addRow(a.getList());    } else {      int A[] = new int[0];      SM.addRow(A);    }  }    public void finalizeData() {    if(!isRoot || isFinalized) throw new    RuntimeException("Trying to finalizeData() to non-root or finalized SplitterBuilder");    WordTable.globalTable.setFrozen(true); // freeze the global word table        SM.finalizeMatrix();    examplesMask = new boolean[SM.numRows()];    Arrays.fill(examplesMask, true);    noOfElements = examplesMask.length;    isFinalized=true;  }      /** describe as a string for dubugging printout. 			*/  public String toString() {    String s = "SetSplitterBuilder for attribute " +    attributeIndex;    s += SM.toString();    return s;  }    //-------------------------- Private Members -----------------------------------//    /** The index of the attribute on which this bulder works 			*/  int attributeIndex;    /** The sparse matrix of examples x tokens. One copy of this is   *  generated by the root splitter builder and is pointed to by   *  all of its decendents.  							*/  SparseMatrix SM;    /** Flag determining the prediction value when a token is not present 	*/  boolean abstain;    /** running-index for checking that examples are provided sequentially.  	*/  private int checkIndex = 0;          //--------------------------- Test Code ----------------------------------------//    /** A main for testing this class */   /*    static public void main(String[] argv) {         try{        int[] labels = {0, 0, 1, 1, 0};    int[][] Tokens = {        {1, 3, 7},        {0, 2},        {4, 6},        {1, 4, 5},        {3}    };            AbstractBooster booster = AbstractBooster.getInstance();        SetSplitterBuilder sb =      new SetSplitterBuilder(0, booster, false,new AttributeDescription[] {null});        Example x;    Attribute[] attArray = new Attribute[1];    Label l;        if(Monitor.logLevel>3) Monitor.log("Input: \t index \t value \t label");        for(int i = 0; i < labels.length; i++) {      l = new Label(labels[i]);      attArray[0] = new SetAttribute(Tokens[i]);      // if(Monitor.logLevel>3) Monitor.log(attArray[0]);      x = new Example(attArray, l);      if(Monitor.logLevel>3) Monitor.log(" \t  "+i+"\t  "+"\t  "+labels[i]);      try{        sb.addExample(i, x);        booster.addExample(i, l);      } catch(IncompAttException e) {        if(Monitor.logLevel>3) Monitor.log(e.getMessage());      }    }        if(Monitor.logLevel>3) Monitor.log("");        booster.finalizeData();    if(Monitor.logLevel>3) Monitor.log(booster);        sb.finalizeData();    if(Monitor.logLevel>3) Monitor.log("\n\nOutput:");    if(Monitor.logLevel>3) Monitor.log(sb);        if(Monitor.logLevel>3) Monitor.log("checking building");    CandidateSplit h = sb.build();        if(Monitor.logLevel>3) Monitor.log(    "\n===\n checking detection of incompatible attribute");        sb = new SetSplitterBuilder(0, booster, true,new AttributeDescription[] {null});    l = new Label(0);    attArray[0] = new DiscreteAttribute(0);    x = new Example(attArray, l);        try {      sb.addExample((int) 0, x); }     catch(IncompAttException e) {      if(Monitor.logLevel>3) Monitor.log(e.getMessage());    }  } catch(Exception e) {    if(Monitor.logLevel>3) Monitor.log(e.getMessage());    e.printStackTrace();		} finally {			if(Monitor.logLevel>3) Monitor.log("finished testing InequalitySplitterBuilder");		}	} */}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -