📄 alternatingtree.java

📁 Boosting算法软件包
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
      + "  for (i = 0; i < num_text_attr; i++) {\n"      + "    a = text_attr[i];\n"      + "    if (!defined_attr(a))\n"      + "      continue;\n"      + "    for (j = 0; j < num_keys; j++)\n"      + "      tokens[a][j] = 0;\n"      + "\n"      + "    while (strlcpy(buffer, attr[a], buffer_size) >= buffer_size) {\n"      + "      buffer_size = 2 * strlen(attr[a]);\n"      + "      buffer = (char *) realloc(buffer, (buffer_size+1) * sizeof(char));\n"      + "    }\n"      + "    num_words = 0;\n"      + "    for (s = strtok(buffer, WHITE_CHARS); s; s = strtok(NULL, WHITE_CHARS)) {\n"      + "      if (num_words >= max_word_list_size) {\n"      + "        max_word_list_size = 2 * max_word_list_size + 1;\n"      + "        words = (char **) realloc(words, max_word_list_size * sizeof(char *));\n"      + "      }\n"      + "      words[num_words++] = s;\n"      + "    }\n"      + "    for (pat = text_patterns[i]; *pat; pat++) {\n"      + "      set_pattern(num_words, words, *pat);\n"      + "      while (more_tokens()) {\n"      + "        s = next_token();\n"      + "        for (entry = hash_table[hash(s)]; entry; entry = entry->next)\n"      + "          if (!strcmp(entry->key, s)) {\n"      + "            tokens[a][entry->id] = 1;\n"      + "            break;\n"      + "          }\n"      + "        }\n"      + "      }\n"      + "  }\n"      + "\n"      + "  reset_pred();\n\n"      + makeCode(root, "  ")      + "\n"      + "  return finalize_pred();\n"      + "}\n";        for (int i = 0; i < primes.length &&    3*numTokens > (hashTableSize = primes[i]); i++)      ;        preamble += "static char *keys[] = {\n";    for (int i = 0; i < numTokens; i++)      preamble += "             \"" + checkChar((String)tokenList.get(i)) + "\",\n";    preamble += "};\n"      + "#define num_keys  (" + numTokens + ")\n"      + "static HashTableEntry_t **hash_table = NULL;\n"      + "#define hash_table_size  (" + hashTableSize + ")\n"      + "static char *tokens[" + (maxTextAttr + 1) + "];\n"      + "static int text_attr[] = {";    for (Iterator i = textAttrs.keySet().iterator(); i.hasNext(); )      preamble += ((Integer) i.next()) + ",";    preamble += "};\n"      + "#define num_text_attr  (" + textAttrs.size() + ")\n";    int c = 0;    for (Iterator i = textAttrs.keySet().iterator(); i.hasNext();) {      preamble += "static char *text_pat_" + c + "[] = {";      for (Iterator j = ((Set) textAttrs.get(i.next())).iterator();      j.hasNext();) {        preamble += "\"" + j.next() + "\",";      }      preamble += "NULL};\n";      c++;    }    preamble += "static char **text_patterns[] = {\n";    for (int i = 0; i < c; i++)      preamble += "  text_pat_" + i + ",\n";    preamble += "};\n";        tokenMap = null; // free memory    tokenList = null;    textAttrs = null;    return preamble + code;  }    private Map tokenMap;    // maps text strings to tokens  private Vector tokenList;  // maps tokens to text strings  private int numTokens;  // number of tokens added to map so far  private int maxTextAttr;  // maximum index of any used text attribute  private int maxAttr;  // maximum index of any used attribute  private Map textAttrs;  // indices of all used text attributes  // mapped to ngram patterns with which they occur  private Set realAttrs;  // indices of all used real attributes  private Set discreteAttrs;  // indices of all used discrete attributes  private static final int[] primes = {1031, 2053, 4099, 8209, 16411,      32771, 65537, 131101, 262147,      524309, 1048583, 2097169, 4194319};    private String makeCode(PredictorNode pn, String tab) {    String code = "";    code += tab + "add_pred(   /* " + pn.id + " */\n";    double[] v = pn.prediction.toCodeArray();    for (int i = 0; i < v.length; i++)      code += tab + "           " + v[i] +      (i < v.length - 1 ? ",\n" : ");\n");    int size = pn.splitterNodes.size();    for (int i = 0; i < size; i++)      code += makeCode((SplitterNode)pn.splitterNodes.get(i), tab);    return code;  }    private String makeCode(SplitterNode sn, String tab) {    String code = "";        Summary summary=sn.splitter.getSummary();        code += tab + "if (defined_attr(" + summary.index + ")) {  /* " +    sn.id + " */\n";    switch(summary.type) {      case Summary.EQUALITY:        code += tab + "  if (int_attr(" + summary.index + ") == " +        ((Integer)summary.val) + ") {\n";      code += makeCode(sn.predictorNodes[0], tab + "    ");      code += tab + "  } else {\n";      code += makeCode(sn.predictorNodes[1], tab + "    ");      code += tab + "  }\n";      code += tab + "}\n";      if (discreteAttrs != null)        discreteAttrs.add(new Integer(summary.index));      break;      case Summary.LESS_THAN:        code += tab + "  if (double_attr(" + summary.index + ") <= " +        ((Double) summary.val) + ") {\n";      code += makeCode(sn.predictorNodes[0], tab + "    ");      code += tab + "  } else {\n";      code += makeCode(sn.predictorNodes[1], tab + "    ");      code += tab + "  }\n";      code += tab + "}\n";      if (realAttrs != null)        realAttrs.add(new Integer(summary.index));      break;      case Summary.CONTAINS_ABSTAIN:      case Summary.CONTAINS_NOABSTAIN:        String s = (String) summary.val;      if (!tokenMap.containsKey(s)) {        tokenMap.put(s, new Integer(numTokens++));        tokenList.add(s);      }            Integer idx = new Integer(summary.index);      if (!textAttrs.containsKey(idx))        textAttrs.put(idx, new TreeSet());      String p = (new StringTokenizer(s)).nextToken();      ((TreeSet) textAttrs.get(idx)).add(p);            if (summary.index > maxTextAttr)        maxTextAttr = summary.index;            code += tab + "  if (attr_contains_token(" + summary.index +      ", " + (Integer) tokenMap.get(s) + ")) {\n";      code += makeCode(sn.predictorNodes[0], tab + "    ");      if (summary.type == Summary.CONTAINS_NOABSTAIN) {        code += tab + "  } else {\n";        code += makeCode(sn.predictorNodes[1], tab + "    ");      }      code += tab + "  }\n";      code += tab + "}\n";      break;      default:        throw new RuntimeException("Type of split not allowed");    }        if (summary.index > maxAttr)      maxAttr = summary.index;    return code;  }    private static final boolean allDefined = false;  /* true if should assume all attributes are defined in Matlab code */    /** Converts this AlternatingTree to Matlab.  Assumes that all   attributes are of type number or finite.  If allDefined = true   then all attributes are assumed to be defined.  */  public String toMatlab(String fname,      ExampleDescription exampleDescription) {    String code = "";    code += ""      + "% This function evaluates a jboost-trained classifier.  The "      + (allDefined ? "" : "first ") + "\n"      + "% argument is an array of values corresponding to the formatted data\n"      + "% used during training."      + (allDefined          ? "\n"               : "  The second argument is an array of values\n"                + "% indicating which of the attributes are defined (where non-zero means\n"                + "% that the corresponding attribute is defined).\n")                + "% This classifier was automatically generated by jboost on:\n"                + "% " + (new Date()) + "\n"                + "function pred = " + fname + "(val"                + (allDefined ? "" : ", def") + ")\n"                + "  pred = zeros([" + root.prediction.toCodeArray().length + ",1]);\n"                + makeMatlabCode(root, "  ")                + "\n";    return code;  }    private String makeMatlabCode(PredictorNode pn, String tab) {    String code = "";    code += tab + "pred = pred + [   % " + pn.id + "\n";    double[] v = pn.prediction.toCodeArray();    for (int i = 0; i < v.length; i++)      code += tab + "           " + v[i] +       (i < v.length - 1 ? "\n" : "];\n");    int size = pn.splitterNodes.size();    for (int i = 0; i < size; i++)      code += makeMatlabCode((SplitterNode)pn.splitterNodes.get(i), tab);    return code;  }    private String makeMatlabCode(SplitterNode sn, String tab) {    String code = "";        Summary summary=sn.splitter.getSummary();        code += tab + (allDefined        ? "    "             : "if def(" + (summary.index+1) + ")")             + " % " + sn.id + "\n";    String stab = (allDefined ? tab : tab + "  ");    switch(summary.type) {      case Summary.EQUALITY:        code += stab + "if val(" + (summary.index+1) + ") == " +        ((Integer)summary.val) + "\n";      code += makeMatlabCode(sn.predictorNodes[0], stab + "  ");      code += stab + "else\n";      code += makeMatlabCode(sn.predictorNodes[1], stab + "  ");      code += stab + "end\n";      if (!allDefined)        code += tab + "end\n";      break;      case Summary.LESS_THAN:        code += stab + "if val(" + (summary.index+1) + ") <= " +        ((Double) summary.val) + "\n";      code += makeMatlabCode(sn.predictorNodes[0], stab + "  ");      code += stab + "else\n";      code += makeMatlabCode(sn.predictorNodes[1], stab + "  ");      code += stab + "end\n";      if (!allDefined)        code += tab + "end\n";      break;      default:        throw new RuntimeException("Type of split not allowed");    }        return code;  }    /** puts leading white space to pad an integer n to have a   *  uniform length l.   */  private String padInteger(int n, int l) {    String s = "" + n;    for (int i = s.length(); i < l; i++)      s = " " + s;    return s;  }    /** converts a character or string to format appropriate to include   *  in a C or java file.   */  private String checkChar(char c) {    switch (c) {      case '\b':        return "\\b";      case '\t':        return "\\t";      case '\n':        return "\\n";      case '\f':        return "\\f";      case '\r':        return "\\r";      case '\"':        return "\\\"";      case '\'':        return "\\\'";      case '\\':        return "\\";      default:        return "" + c;    }  }    private String checkChar(String s) {    String r = "";    for (int i = 0; i < s.length(); i++)      r += checkChar(s.charAt(i));    return r;  }    public PredictorNode getRoot() {    return root;  }    /**   * Walk through this tree and create lists of the predictor and splitter nodes   * The lists passed to this method will be updated   * XXX: should this method clone the nodes it copies into the lists?   *    * @param predictors the list of PredictorNodes from this tree   * @param splitters the list of SplitterNodes from this tree   */  public void getNodes(ArrayList predictors, ArrayList splitters) {    if (predictors == null || splitters == null) {      throw new IllegalArgumentException("Can not use null lists.");    }        Stack stack= new Stack();    PredictorNode node= getRoot();    stack.push(node);        while (!stack.empty()) {      // add the next node from the stack to the list      node= (PredictorNode) stack.pop();      predictors.add(node);            // add each predictor from each splitter to the stack      // add each splitter to the list of splitters      for (int i=0; i < node.getSplitterNodeNo(); i++) {        SplitterNode splitter= (SplitterNode) node.getSplitterNodes().get(i);        splitters.add(splitter);                for (int j=0; j < splitter.getPredictorNodes().length; j++) {          stack.push(splitter.getPredictorNodes()[j]);        }      }    }      }    /** The root node of the Tree */  protected PredictorNode root;      /**   *  Creates an InstrumentedAlternatingTree from this AlternatingTree by creating the   *  appropriate internal data structures.                                               */  public ComplexLearner instrument(SplitterBuilder[] sb, Booster booster) {        return(null);      }    }class NodeDescription implements Comparable {  int no;  public String desc;    public NodeDescription(int i, String d) {    no=i;    d=desc;  }    public int compareTo(Object o) throws ClassCastException {    NodeDescription nd=(NodeDescription)o;    return(no-nd.no);  }}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -