📄 alternatingtree.java
字号:
+ " for (i = 0; i < num_text_attr; i++) {\n" + " a = text_attr[i];\n" + " if (!defined_attr(a))\n" + " continue;\n" + " for (j = 0; j < num_keys; j++)\n" + " tokens[a][j] = 0;\n" + "\n" + " while (strlcpy(buffer, attr[a], buffer_size) >= buffer_size) {\n" + " buffer_size = 2 * strlen(attr[a]);\n" + " buffer = (char *) realloc(buffer, (buffer_size+1) * sizeof(char));\n" + " }\n" + " num_words = 0;\n" + " for (s = strtok(buffer, WHITE_CHARS); s; s = strtok(NULL, WHITE_CHARS)) {\n" + " if (num_words >= max_word_list_size) {\n" + " max_word_list_size = 2 * max_word_list_size + 1;\n" + " words = (char **) realloc(words, max_word_list_size * sizeof(char *));\n" + " }\n" + " words[num_words++] = s;\n" + " }\n" + " for (pat = text_patterns[i]; *pat; pat++) {\n" + " set_pattern(num_words, words, *pat);\n" + " while (more_tokens()) {\n" + " s = next_token();\n" + " for (entry = hash_table[hash(s)]; entry; entry = entry->next)\n" + " if (!strcmp(entry->key, s)) {\n" + " tokens[a][entry->id] = 1;\n" + " break;\n" + " }\n" + " }\n" + " }\n" + " }\n" + "\n" + " reset_pred();\n\n" + makeCode(root, " ") + "\n" + " return finalize_pred();\n" + "}\n"; for (int i = 0; i < primes.length && 3*numTokens > (hashTableSize = primes[i]); i++) ; preamble += "static char *keys[] = {\n"; for (int i = 0; i < numTokens; i++) preamble += " \"" + checkChar((String)tokenList.get(i)) + "\",\n"; preamble += "};\n" + "#define num_keys (" + numTokens + ")\n" + "static HashTableEntry_t **hash_table = NULL;\n" + "#define hash_table_size (" + hashTableSize + ")\n" + "static char *tokens[" + (maxTextAttr + 1) + "];\n" + "static int text_attr[] = {"; for (Iterator i = textAttrs.keySet().iterator(); i.hasNext(); ) preamble += ((Integer) i.next()) + ","; preamble += "};\n" + "#define num_text_attr (" + textAttrs.size() + ")\n"; int c = 0; for (Iterator i = textAttrs.keySet().iterator(); i.hasNext();) { preamble += "static char *text_pat_" + c + "[] = {"; for (Iterator j = ((Set) textAttrs.get(i.next())).iterator(); j.hasNext();) { preamble += "\"" + j.next() + "\","; } preamble += "NULL};\n"; c++; } preamble += "static char **text_patterns[] = {\n"; for (int i = 0; i < c; i++) preamble += " text_pat_" + i + ",\n"; preamble += "};\n"; tokenMap = null; // free memory tokenList = null; textAttrs = null; return preamble + code; } private Map tokenMap; // maps text strings to tokens private Vector tokenList; // maps tokens to text strings private int numTokens; // number of tokens added to map so far private int maxTextAttr; // maximum index of any used text attribute private int maxAttr; // maximum index of any used attribute private Map textAttrs; // indices of all used text attributes // mapped to ngram patterns with which they occur private Set realAttrs; // indices of all used real attributes private Set discreteAttrs; // indices of all used discrete attributes private static final int[] primes = {1031, 2053, 4099, 8209, 16411, 32771, 65537, 131101, 262147, 524309, 1048583, 2097169, 4194319}; private String makeCode(PredictorNode pn, String tab) { String code = ""; code += tab + "add_pred( /* " + pn.id + " */\n"; double[] v = pn.prediction.toCodeArray(); for (int i = 0; i < v.length; i++) code += tab + " " + v[i] + (i < v.length - 1 ? ",\n" : ");\n"); int size = pn.splitterNodes.size(); for (int i = 0; i < size; i++) code += makeCode((SplitterNode)pn.splitterNodes.get(i), tab); return code; } private String makeCode(SplitterNode sn, String tab) { String code = ""; Summary summary=sn.splitter.getSummary(); code += tab + "if (defined_attr(" + summary.index + ")) { /* " + sn.id + " */\n"; switch(summary.type) { case Summary.EQUALITY: code += tab + " if (int_attr(" + summary.index + ") == " + ((Integer)summary.val) + ") {\n"; code += makeCode(sn.predictorNodes[0], tab + " "); code += tab + " } else {\n"; code += makeCode(sn.predictorNodes[1], tab + " "); code += tab + " }\n"; code += tab + "}\n"; if (discreteAttrs != null) discreteAttrs.add(new Integer(summary.index)); break; case Summary.LESS_THAN: code += tab + " if (double_attr(" + summary.index + ") <= " + ((Double) summary.val) + ") {\n"; code += makeCode(sn.predictorNodes[0], tab + " "); code += tab + " } else {\n"; code += makeCode(sn.predictorNodes[1], tab + " "); code += tab + " }\n"; code += tab + "}\n"; if (realAttrs != null) realAttrs.add(new Integer(summary.index)); break; case Summary.CONTAINS_ABSTAIN: case Summary.CONTAINS_NOABSTAIN: String s = (String) summary.val; if (!tokenMap.containsKey(s)) { tokenMap.put(s, new Integer(numTokens++)); tokenList.add(s); } Integer idx = new Integer(summary.index); if (!textAttrs.containsKey(idx)) textAttrs.put(idx, new TreeSet()); String p = (new StringTokenizer(s)).nextToken(); ((TreeSet) textAttrs.get(idx)).add(p); if (summary.index > maxTextAttr) maxTextAttr = summary.index; code += tab + " if (attr_contains_token(" + summary.index + ", " + (Integer) tokenMap.get(s) + ")) {\n"; code += makeCode(sn.predictorNodes[0], tab + " "); if (summary.type == Summary.CONTAINS_NOABSTAIN) { code += tab + " } else {\n"; code += makeCode(sn.predictorNodes[1], tab + " "); } code += tab + " }\n"; code += tab + "}\n"; break; default: throw new RuntimeException("Type of split not allowed"); } if (summary.index > maxAttr) maxAttr = summary.index; return code; } private static final boolean allDefined = false; /* true if should assume all attributes are defined in Matlab code */ /** Converts this AlternatingTree to Matlab. Assumes that all attributes are of type number or finite. If allDefined = true then all attributes are assumed to be defined. */ public String toMatlab(String fname, ExampleDescription exampleDescription) { String code = ""; code += "" + "% This function evaluates a jboost-trained classifier. The " + (allDefined ? "" : "first ") + "\n" + "% argument is an array of values corresponding to the formatted data\n" + "% used during training." + (allDefined ? "\n" : " The second argument is an array of values\n" + "% indicating which of the attributes are defined (where non-zero means\n" + "% that the corresponding attribute is defined).\n") + "% This classifier was automatically generated by jboost on:\n" + "% " + (new Date()) + "\n" + "function pred = " + fname + "(val" + (allDefined ? "" : ", def") + ")\n" + " pred = zeros([" + root.prediction.toCodeArray().length + ",1]);\n" + makeMatlabCode(root, " ") + "\n"; return code; } private String makeMatlabCode(PredictorNode pn, String tab) { String code = ""; code += tab + "pred = pred + [ % " + pn.id + "\n"; double[] v = pn.prediction.toCodeArray(); for (int i = 0; i < v.length; i++) code += tab + " " + v[i] + (i < v.length - 1 ? "\n" : "];\n"); int size = pn.splitterNodes.size(); for (int i = 0; i < size; i++) code += makeMatlabCode((SplitterNode)pn.splitterNodes.get(i), tab); return code; } private String makeMatlabCode(SplitterNode sn, String tab) { String code = ""; Summary summary=sn.splitter.getSummary(); code += tab + (allDefined ? " " : "if def(" + (summary.index+1) + ")") + " % " + sn.id + "\n"; String stab = (allDefined ? tab : tab + " "); switch(summary.type) { case Summary.EQUALITY: code += stab + "if val(" + (summary.index+1) + ") == " + ((Integer)summary.val) + "\n"; code += makeMatlabCode(sn.predictorNodes[0], stab + " "); code += stab + "else\n"; code += makeMatlabCode(sn.predictorNodes[1], stab + " "); code += stab + "end\n"; if (!allDefined) code += tab + "end\n"; break; case Summary.LESS_THAN: code += stab + "if val(" + (summary.index+1) + ") <= " + ((Double) summary.val) + "\n"; code += makeMatlabCode(sn.predictorNodes[0], stab + " "); code += stab + "else\n"; code += makeMatlabCode(sn.predictorNodes[1], stab + " "); code += stab + "end\n"; if (!allDefined) code += tab + "end\n"; break; default: throw new RuntimeException("Type of split not allowed"); } return code; } /** puts leading white space to pad an integer n to have a * uniform length l. */ private String padInteger(int n, int l) { String s = "" + n; for (int i = s.length(); i < l; i++) s = " " + s; return s; } /** converts a character or string to format appropriate to include * in a C or java file. */ private String checkChar(char c) { switch (c) { case '\b': return "\\b"; case '\t': return "\\t"; case '\n': return "\\n"; case '\f': return "\\f"; case '\r': return "\\r"; case '\"': return "\\\""; case '\'': return "\\\'"; case '\\': return "\\"; default: return "" + c; } } private String checkChar(String s) { String r = ""; for (int i = 0; i < s.length(); i++) r += checkChar(s.charAt(i)); return r; } public PredictorNode getRoot() { return root; } /** * Walk through this tree and create lists of the predictor and splitter nodes * The lists passed to this method will be updated * XXX: should this method clone the nodes it copies into the lists? * * @param predictors the list of PredictorNodes from this tree * @param splitters the list of SplitterNodes from this tree */ public void getNodes(ArrayList predictors, ArrayList splitters) { if (predictors == null || splitters == null) { throw new IllegalArgumentException("Can not use null lists."); } Stack stack= new Stack(); PredictorNode node= getRoot(); stack.push(node); while (!stack.empty()) { // add the next node from the stack to the list node= (PredictorNode) stack.pop(); predictors.add(node); // add each predictor from each splitter to the stack // add each splitter to the list of splitters for (int i=0; i < node.getSplitterNodeNo(); i++) { SplitterNode splitter= (SplitterNode) node.getSplitterNodes().get(i); splitters.add(splitter); for (int j=0; j < splitter.getPredictorNodes().length; j++) { stack.push(splitter.getPredictorNodes()[j]); } } } } /** The root node of the Tree */ protected PredictorNode root; /** * Creates an InstrumentedAlternatingTree from this AlternatingTree by creating the * appropriate internal data structures. */ public ComplexLearner instrument(SplitterBuilder[] sb, Booster booster) { return(null); } }class NodeDescription implements Comparable { int no; public String desc; public NodeDescription(int i, String d) { no=i; d=desc; } public int compareTo(Object o) throws ClassCastException { NodeDescription nd=(NodeDescription)o; return(no-nd.no); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -