📄 alternatingtree.java
字号:
+ " static{\n" + " for (int i = 0; i < num_text_attr; i++)\n" + " tokens[text_attr[i]] = new boolean[num_keys];\n" + " }\n" + "\n" + " static private boolean defined_attr(int i) {\n" + " return (attr[i] != null);\n" + " }\n" + "\n" + " static private double double_attr(int i) {\n" + " double r;\n" + " try {\n" + " r = ((Double) attr[i]).doubleValue();\n" + " }\n" + " catch (ClassCastException e) {\n" + " throw new IllegalArgumentException\n" + " (\"Expected attribute \" + i + \" to be of type Double\");\n" + " }\n" + " return r;\n" + " }\n" + "\n" + " static private int int_attr(int i) {\n" + " int r;\n" + " try {\n" + " r = ((Integer) attr[i]).intValue();\n" + " }\n" + " catch (ClassCastException e) {\n" + " throw new IllegalArgumentException\n" + " (\"Expected attribute \" + i + \" to be of type Integer\");\n" + " }\n" + " return r;\n" + " }\n" + "\n" + " static private boolean attr_contains_token(int i, int k) {\n" + " return tokens[i][k];\n" + " }\n" + "\n" + " static private int cur_word = 0;\n" + " static private String[] words = null;\n" + " static private boolean[] pattern = null;\n" + " static private String prepend = \"\";\n" + " static private final String[] prependChar = {\"0\", \"1\"};\n" + "\n" + " static private void setPattern(String[] m_w, boolean[] pat) {\n" + " words = m_w;\n" + " pattern = pat;\n" + " cur_word = 0;\n" + " prepend = \"\";\n" + " for (int i = 0; i < pattern.length; i++)\n" + " prepend += prependChar[pattern[i] ? 1 : 0];\n" + " }\n" + "\n" + " static private boolean moreTokens() {\n" + " return (cur_word <= words.length - pattern.length);\n" + " }\n" + "\n" + " static private String nextToken() {\n" + " if (!moreTokens())\n" + " throw new NoSuchElementException();\n" + " String r = prepend;\n" + " for (int i = 0; i < pattern.length; i++) {\n" + " if (pattern[i])\n" + " r += \" \" + words[cur_word + i];\n" + " }\n" + " cur_word++;\n" + " return r;\n" + " }\n" + "\n" + root.prediction.javaPreamble(); if (specFileName != null) { code += "" + "//------------------------- start cut here to make stand alone -----------\n" + " static private jboost.examples.Example ex;\n" + "\n" + " /**\n" + " The main reads examples from standard input. The examples must\n" + " have the same format as used during training. After each example\n" + " is read a vector of scores is output, one per class.\n" + " **/\n" + " static public void main(String[] argv) {\n" + " jboost.monitor.Monitor.logLevel = -1;\n" + "\n" + " jboost.tokenizer.ExampleStream exStream = null;\n" + "\n" + " try {\n" + " exStream =\n" + " new jboost.tokenizer.ExampleStream(\n" + " (new jboost.tokenizer.jboost_DataStream(false, savedSpec)));\n" + " }\n" + " catch (Exception e) {\n" + " System.err.println(\"Unable to parse spec file.\");\n" + " e.printStackTrace();\n" + " return;\n" + " }\n" + "\n" + " attr = new Object[" + (maxAttr+1) + "];\n" + "\n" + " jboost.examples.TextDescription.setTokenSet(keys);\n" + "\n" + " int i, a, j, k;\n" + " double[] r;\n" + "\n" + " while(true) {\n" + " try {\n" + " ex = exStream.getExample();\n" + " }\n" + " catch(jboost.tokenizer.ParseException e) {\n" + " System.err.println(\"Bad example encountered:\");\n" + " System.err.println(e.getMessage());\n" + " continue;\n" + " }\n" + " if (ex == null)\n" + " break;\n" + " for (j = 0; j < real_attr.length; j++) {\n" + " a = real_attr[j];\n" + " jboost.examples.Attribute at = ex.getAttribute(a);\n" + " attr[a] = (at.isDefined()\n" + " ? (new Double(((jboost.examples.RealAttribute) at).getValue()))\n" + " : null);\n" + " }\n" + " for (j = 0; j < disc_attr.length; j++) {\n" + " a = disc_attr[j];\n" + " jboost.examples.Attribute at = ex.getAttribute(a);\n" + " attr[a] = (at.isDefined()\n" + " ? (new Integer(((jboost.examples.DiscreteAttribute) at).getValue()))\n" + " : null);\n" + " }\n" + " for (j = 0; j < text_attr.length; j++) {\n" + " a = text_attr[j];\n" + " jboost.examples.Attribute at = ex.getAttribute(a);\n" + " if (at.isDefined()) {\n" + " attr[a] = \"\";\n" + " int[] set = ((jboost.examples.SetAttribute) ex.getAttribute(a)).getList();\n" + "\n" + " Arrays.fill(tokens[a], false);\n" + " for (k = 0; k < set.length; k++) {\n" + " tokens[a][set[k]] = true;\n" + " }\n" + " } else\n" + " attr[a] = null;\n" + " }\n" + "\n" + " r = " + fname_int + "();\n" + " for(j = 0; j < r.length; j++)\n" + " System.out.print(r[j] + \" \");\n" + " System.out.println();\n" + " }\n" + " }\n" + "\n" + " private static final String savedSpec = \""; BufferedReader spec = new BufferedReader(new FileReader(specFileName)); final int buflen = 1000; char[] cbuf = new char[buflen]; int i,n; while ((n = spec.read(cbuf, 0, buflen)) != -1) for (i = 0; i < n; i++) code += checkChar(cbuf[i]); code += "\";\n" + "//--------------------------- end cut ------------------------------------\n"; } code += "\n}\n"; tokenMap = null; // free memory tokenList = null; realAttrs = null; discreteAttrs = null; textAttrs = null; return code; } /** Converts this AlternatingTree to C */ public String toC(String fname, ExampleDescription exampleDescription) { String preamble = ""; String code = ""; tokenMap = new HashMap(); tokenList = new Vector(); numTokens = 0; maxTextAttr = 0; realAttrs = discreteAttrs = null; textAttrs = new TreeMap(); int hashTableSize = -1; preamble += "" + "/*************************************\n" + "This program can be used to evaluate a jboost-trained classifier on new data. \n" + "The code contains a single procedure:\n" + "\n" + " double " + fname + "(void **attr, double *ret)\n" + "\n" + "The first argument attr is an array of pointers corresponding to the\n" + "attributes specified in the spec file. Thus, if attribute i is text,\n" + "then attr[i] must be a char array; if attribute i is a number, then\n" + "*attr[i] must be a double; and if attribute i is finite, then *attr[i]\n" + "must be an int containing the index of the chosen value. An undefined\n" + "attribute is indicated by setting attr[i] to NULL.\n" + "\n" + "Specifically, these pointers are:\n" + "\n" + " index attr.type data.type name\n" + " ------------------------------------------\n"; AttributeDescription[] ad = exampleDescription.getAttributes(); for (int i = 0; i < ad.length; i++) { String s; String key = ""; String t = ad[i].getType(); if (t.equals("number")) { s = "number double "; } else if (t.equals("text")) { s = "text char "; } else if (t.equals("finite")) { s = "finite int "; for (int j = 0; j < ad[i].getNoOfValues(); j++) key += (j == 0 ? " key: " : " ") + padInteger(j, 5) + " = " + ad[i].getAttributeValue(j) + "\n"; } else { System.err.println("Warning: unrecognized type for attribute " + i + ": " + t); s = "??? ??? "; } preamble += " " + padInteger(i, 5) + " " + s + " " + ad[i].getAttributeName() + "\n" + key; } preamble += "" + "\n" + "The second argument ret is a pointer to an array of k doubles, where k\n" + "is the number of classes. The scores for each of the k classes will\n" + "be stored in this array. If ret is NULL, then no scores are stored.\n" + "In any case, predict returns the score for class 0 (=ret[0]).\n" + "\n" + "These scores correspond to the classes as follows:\n" + "\n" + " index class name\n" + " ------------------------\n"; AttributeDescription la = exampleDescription.getLabelDescription(); for (int j = 0; j < la.getNoOfValues(); j++) preamble += " " + padInteger(j, 5) + " " + la.getAttributeValue(j) + "\n"; preamble += "" + "\n" + "This classifier was automatically generated by jboost on\n" + (new Date()) + ".\n" + "*************************************/\n" + "#include <stdio.h>\n" + "#include <stdlib.h>\n" + "#include <strings.h>\n" + "#include <values.h>\n" + root.prediction.cPreamble() + "#define defined_attr(X) (attr[X])\n" + "#define int_attr(X) (*((int *) attr[X]))\n" + "#define attr_contains_token(X,Y) (tokens[X][Y])\n" + "#define double_attr(X) (*((double *) attr[X]))\n\n" + "#define WHITE_CHARS \" \\t\\n\"\n" + "typedef struct hash_table_entry_s {\n" + " char *key;\n" + " int id;\n" + " struct hash_table_entry_s *next;\n" + "} HashTableEntry_t;\n"; code += "\n" + "static char **words;\n" + "static int num_words;\n" + "static int cur_word;\n" + "static char *pattern;\n" + "static int pattern_len;\n" + "\n" + "static void set_pattern(int n, char **m_w, char *pat) {\n" + " words = m_w;\n" + " pattern = pat;\n" + " cur_word = 0;\n" + " num_words = n;\n" + " pattern_len = strlen(pat);\n" + "}\n" + "\n" + "#define more_tokens() (cur_word <= num_words - pattern_len)\n" + "\n" + "#define ADD_CHAR(S) { \\\n" + " if (c >= buffer_size) { \\\n" + " buffer_size = 2 * buffer_size + 1; \\\n" + " buffer = (char *) realloc(buffer, buffer_size * sizeof(char)); \\\n" + " } \\\n" + " buffer[c++] = (S); \\\n" + "}\n" + "\n" + "static char *next_token(void) {\n" + " static char *buffer = NULL;\n" + " static int buffer_size = 0;\n" + " int i, c;\n" + " char *s;\n" + "\n" + " c = 0;\n" + " for (s = pattern; *s; s++)\n" + " ADD_CHAR(*s);\n" + "\n" + " for (i = 0; i < pattern_len; i++) {\n" + " if (pattern[i] == '1') {\n" + " ADD_CHAR(' ');\n" + " for (s = words[cur_word + i]; *s; s++)\n" + " ADD_CHAR(*s);\n" + " }\n" + " }\n" + " ADD_CHAR('\\0');\n" + " cur_word++;\n" + " return buffer;\n" + "}\n" + "\n" + "static int\n" + "hash(char *s) {\n" + " static int *coef = NULL;\n" + " static int max_len = 0;\n" + " int len = strlen(s);\n" + " int i, h;\n\n" + " if (len > max_len) {\n" + " i = max_len;\n" + " max_len = 2 * len;\n" + " coef = (int *) realloc(coef, max_len * sizeof(int));\n" + " for (; i < max_len; i++)\n" + " coef[i] = random() % hash_table_size;\n" + " }\n\n" + " h = 0;\n" + " for (i = 0; i < len; i++) {\n" + " if (h >= MAXINT - (hash_table_size - 1) * 255)\n" + " h = h % hash_table_size;\n" + " h += coef[i] * s[i];\n" + " }\n" + " return (h % hash_table_size);\n" + "}\n\n" + "double " + fname + "(void **attr, double *r) {\n" + " int i, j, h, a;\n" + " HashTableEntry_t *entry;\n" + " char *s;\n" + " static char **words = NULL;\n" + " static int max_word_list_size = 0;\n" + " int num_words;\n" + " static char *buffer = NULL;\n" + " static int buffer_size = 0;\n" + " char **pat;\n" + "\n" + " Prediction_t p;\n" + "\n" + " if (!hash_table) {\n" + " hash_table = (HashTableEntry_t **)\n" + " malloc(hash_table_size * sizeof(HashTableEntry_t *));\n" + " for (i = 0; i < hash_table_size; i++)\n" + " hash_table[i] = NULL;\n" + " for (i = 0; i < num_keys; i++) {\n" + " h = hash(keys[i]);\n" + " entry = (HashTableEntry_t *) malloc(sizeof(HashTableEntry_t));\n" + " entry->key = keys[i];\n" + " entry->id = i;\n" + " entry->next = hash_table[h];\n" + " hash_table[h] = entry;\n" + " }\n" + " for (i = 0; i < num_text_attr; i++)\n" + " tokens[text_attr[i]] = (char *) malloc(num_keys * sizeof(char));\n" + " }\n" + "\n"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -