📄 dti.c
字号:
double minval = -DBL_MAX; /* minimal value of selection measure */ float mincnt = 2.0F; /* minimal number of cases */ int maxht = INT_MAX; /* maximal height of tree */ int inflags = AS_NOXATT; /* table file read flags */ int dtflags = 0; /* flags, e.g. DF_SUBSET */ int balance = 0; /* flag for balancing class freqs. */ int maxlen = 0; /* maximal output line length */ int dmode = 0; /* description mode */ int attcnt = 0; /* number of attributes */ const MINFO *ntab; /* table of measure names */ int nodecnt; /* number of nodes in dec./reg. tree */ double tplwgt; /* sum of the tuple weights */ int trgid; /* id of target column */ ATT *att; /* to traverse attributes */ double *evals; /* attribute evaluations */ prgname = argv[0]; /* get program name for error msgs. */ /* --- print startup/usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no argument is given */ printf("usage: %s [options] domfile " "[-d|-h hdrfile] tabfile dtfile\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-c# target attribute name " "(default: last attribute)\n"); printf("-q# balance class frequencies (weight tuples)\n"); printf(" l: lower, b: boost, s: shift tuple weights\n"); printf("-e# attribute selection measure " "(default: infgr/rmse)\n"); printf("-! print a list of available " "attribute selection measures\n"); printf("-z# sensitivity parameter (default: %g)\n", params[0]); printf(" (for measures wdiff, bdm, bdmod, " "rdlen1, rdlen2)\n"); printf("-p# prior (positive value) " "or equivalent sample size (negative value)\n"); printf(" (for measures bdm, bdmod)\n"); printf("-i# minimal value of the selection measure " "(default: no limit)\n"); printf("-w do not weight measure with fraction " "of known values\n"); printf("-t# maximal height of the tree (default: no limit)\n"); printf("-m# minimal number of tuples in two branches " "(default: %g)\n", mincnt); printf("-s try to form subsets on symbolic attributes\n"); printf("-x only evaluate attributes w.r.t. target\n"); printf("-l# output line length (default: no limit)\n"); printf("-a align values of test attributes " "(default: do not align)\n"); printf("-v print relative frequencies (in percent)\n"); printf("-b/f/r# blank characters, field and record separators\n" " (default: \" \\t\\r\", \" \\t\", \"\\n\")\n"); printf("-u# unknown value characters (default: \"?\")\n"); printf("-n number of tuple occurrences in last field\n"); printf("domfile file containing domain descriptions\n"); printf("-d use default header " "(field names = field numbers)\n"); printf("-h read table header (field names) from hdrfile\n"); printf("hdrfile file containing table header (field names)\n"); printf("tabfile table file to read " "(field names in first record)\n"); printf("dtfile file to write induced " "decision/regression tree to\n"); return 0; /* print a usage message */ } /* and abort the program */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (1) { /* traverse characters */ switch (*s++) { /* evaluate option */ case '!': help(); break; case 'c': optarg = &trgname; break; case 'q': balance = (*s) ? *s++ : 0; break; case 'e': optarg = &mname; break; case 'z': params[0] = strtod(s, &s); break; case 'p': params[1] = strtod(s, &s); break; case 'i': minval = strtod(s, &s); break; case 'w': wgtd = 0; break; case 'm': mincnt = (float)strtod(s, &s); break; case 's': dtflags |= DT_SUBSET; break; case 'x': dtflags |= DT_EVAL; break; case 't': maxht = (int)strtol(s, &s, 0); break; case 'l': maxlen = (int)strtol(s, &s, 0); break; case 'a': dmode |= DT_ALIGN; break; case 'v': dmode |= DT_REL; break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; case 'u': optarg = &uvchars; break; case 'n': inflags |= AS_WEIGHT; break; case 'd': inflags |= AS_DFLT; break; case 'h': optarg = &fn_hdr; break; default : error(E_OPTION, *--s); break; } /* set option variables */ if (!*s) break; /* if at end of string, abort loop */ if (optarg) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* if argument is no option */ switch (k++) { /* evaluate non-option */ case 0: fn_dom = s; break; case 1: fn_tab = s; break; case 2: fn_dt = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check option argument */ if (k != 3) error(E_ARGCNT); /* check number of arguments */ if (mincnt < 0) error(E_MINCNT, mincnt); if (fn_hdr && (strcmp(fn_hdr, "-") == 0)) fn_hdr = ""; /* convert "-" to "" */ i = (!fn_dom || !*fn_dom) ? 1 : 0; if (!fn_tab || !*fn_tab) i++; if ( fn_hdr && !*fn_hdr) i++;/* check assignments of stdin: */ if (i > 1) error(E_STDIN); /* stdin must not be used twice */ if ((balance != 0) && (balance != 'l') && (balance != 'b') && (balance != 's')) error(E_BALANCE, balance); /* check balancing mode */ if (fn_hdr) /* set the header file flag */ inflags = AS_ATT | (inflags & ~AS_DFLT); /* --- read attribute set --- */ scan = sc_create(fn_dom); /* create a scanner */ if (!scan) error((!fn_dom || !*fn_dom) ? E_NOMEM : E_FOPEN, fn_dom); attset = as_create("domains", att_delete); if (!attset) error(E_NOMEM); /* create an attribute set */ fprintf(stderr, "\nreading %s ... ", sc_fname(scan)); if ((sc_nexter(scan) < 0) /* start scanning (get first token) */ || (as_parse(attset, scan, AT_ALL) != 0) || (as_attcnt(attset) <= 0) /* parse attribute set and */ || !sc_eof(scan)) /* check for end of file */ error(E_PARSE, sc_fname(scan)); fprintf(stderr, "[%d attribute(s)] done.", as_attcnt(attset)); /* --- determine id of target column --- */ if (!trgname) /* if no target att. name is given, */ trgid = as_attcnt(attset)-1;/* use the last att. as the target */ else { /* if a target att. name is given */ trgid = as_attid(attset, trgname); if (trgid < 0) error(E_TARGET, trgname, sc_fname(scan)); } /* check whether the target exists */ att = as_att(attset, trgid); /* get the target attribute */ sc_delete(scan); scan = NULL; /* and delete the scanner */ /* --- translate measure --- */ if (att_type(att) == AT_SYM){ /* if symbolic target attribute */ if (!mname) mname = "infgr";/* set default measure name and */ ntab = symtab; } /* get the symbolic name table */ else { /* if numeric target attribute */ if (!mname) mname = "rmse"; /* set default measure name and */ ntab = numtab; /* get the numeric name table, */ } /* then get the measure code */ s = strchr(mname, ':'); /* check for a double measure */ if (!s) /* if only a single measure name, */ measure = code(ntab,mname); /* simply get the measure code */ else { /* if a double measure name */ *s++ = 0; measure = code(ntab, mname); if (measure < 0) measure = code(ntab, s); *s = ':'; } /* check both measure names */ if (measure < 0) error(E_MEASURE, mname); measure |= wgtd; /* add flag for weighted measure */ fprintf(stderr, "\n"); /* terminate the startup message */ /* --- read table --- */ as_chars(attset, blanks, fldseps, recseps, uvchars); table = io_tabin(attset, fn_hdr, fn_tab, inflags, "table", 1); if (!table) error(1); /* read the table file */ fprintf(stderr, "reducing%s table ... ", (balance) ? " and balancing" : ""); tab_reduce(table); /* reduce table for speed up */ if (balance /* if the balance flag is set */ && (att_type(as_att(attset, trgid)) == AT_SYM)) { tab_balance(table, trgid, (balance == 'l') ? -2.0F : (balance == 'b') ? -1.0F : 0.0F, NULL); } /* balance the class frequencies */ fprintf(stderr, "[%d/%g tuple(s)] done.\n", tab_tplcnt(table), tab_getwgt(table, 0, INT_MAX)); /* --- grow decision/regression tree --- */ fprintf(stderr, "growing %s tree ... ", (att_type(att) == AT_SYM) ? "decision" : "regression"); dtree = dt_grow(table, trgid, measure, params, minval, maxht, mincnt, dtflags); if (!dtree) error(E_NOMEM); /* grow decision/regression tree */ attcnt = dt_attchk(dtree); /* mark occuring attributes */ nodecnt = dt_size(dtree); /* get the number of nodes, */ maxht = dt_height(dtree); /* the height of the tree, */ tplwgt = dt_total(dtree); /* and the tuple weight sum */ fprintf(stderr, "[%d attribute(s)/%d node(s)/%d level(s)] done.\n", attcnt, nodecnt, maxht); /* --- write decision/regression tree --- */ if (fn_dt && *fn_dt) /* if an output file name is given, */ out = fopen(fn_dt, "w"); /* open the output file */ else { /* if no output file name is given, */ out = stdout; fn_dt = "<stdout>"; } /* write to std. output */ fprintf(stderr, "writing %s ... ", fn_dt); if (!out) error(E_FOPEN, fn_dt); if (dtflags & DT_EVAL) { /* if attributes were evaluated */ n = as_attcnt(attset); /* get the number of attributes */ for (i = w = 0; i < n; i++) { if (i == trgid) continue; /* traverse the attributes */ k = strlen(att_name(as_att(attset, i))); if (k > w) w = k; /* determine the maximum width */ } /* of an attribute name */ evals = (double*)att_info(as_att(attset, trgid))->p; for (w += 2, i = 0; i < n; i++) { if (i == trgid) continue; /* traverse the attributes, */ att = as_att(attset, i); /* but skip the target attribute */ k = fprintf(out, att_name(att)); if (k < 0) break; /* print the attribute name */ while (++k <= w) fputc(' ', out); fprintf(out, "%g", evals[i]); if (att_type(att) != AT_SYM) fprintf(out, " (%g)", att_info(att)->f); fputc('\n', out); /* print the attribute evaluation */ } /* and, if necessary, a cut value */ free(evals); } /* delete the evaluation vector */ else { /* if a decision tree has been grown */ if (as_desc(attset, out, AS_TITLE|AS_MARKED|AS_IVALS, maxlen) != 0) error(E_FWRITE, fn_dt); /* describe attribute domains */ fputc('\n', out); /* leave one line empty */ if (dt_desc(dtree, out, DT_TITLE|DT_INFO|dmode, maxlen) != 0) error(E_FWRITE, fn_dt); /* describe decision/regression tree */ } if (out != stdout) { /* if not written to standard output, */ i = fclose(out); out = NULL;/* close the output file */ if (i != 0) error(E_FWRITE, fn_dt); } /* check for success and */ fprintf(stderr, "[%d attribute(s)/%d node(s)/%d level(s)] done.\n", attcnt, nodecnt, maxht); /* --- clean up --- */ #ifndef NDEBUG dt_delete(dtree, 0); /* delete decision/regression tree, */ tab_delete(table, 1); /* table, and attribute set */ #endif #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */} /* main() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -