📄 dtp.c
字号:
double tplwgt; /* weight of tuples */ int attcnt; /* number of attributes in dec. tree */ int nodecnt; /* number of nodes in dec./reg. tree */ clock_t t; /* timer for measurements */ prgname = argv[0]; /* get program name for error msgs. */ /* --- print startup/usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no argument is given */ printf("usage: %s [options] dtfile pdtfile " "[[-d|-h hdrfile] tabfile]\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-m# pruning method (default: clvl)\n"); printf(" none: no pruning\n"); printf(" pess: pessimistic pruning\n"); printf(" clvl: confidence level pruning\n"); printf("-p# pruning parameter (default: %g)\n", param); printf("-q# balance class frequencies (weight tuples)\n"); printf(" l: lower, b: boost, s: shift tuple weights\n"); printf("-t# maximal height of the tree (default: no limit)\n"); printf("-k check largest branch " "(can be very time consuming)\n"); printf("-l# output line length (default: no limit)\n"); printf("-a align values of test attributes " "(default: do not align)\n"); printf("-v print relative frequencies (in percent)\n"); printf("-b/f/r# blank characters, field and record separators\n" " (default: \" \\t\\r\", \" \\t\", \"\\n\")\n"); printf("-u# unknown value characters (default: \"?\")\n"); printf("-n number of tuple occurrences in last field\n"); printf("dtfile file containing decision/regression " "tree description\n"); printf("pdtfile file to write pruned " "decision/regression tree to\n"); printf("-d use default header " "(field names = field numbers)\n"); printf("-h read table header (field names) from hdrfile\n"); printf("hdrfile file containing table header (field names)\n"); printf("tabfile table file to read " "(field names in first record)\n"); return 0; /* print a usage message */ } /* and abort the program */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (1) { /* traverse characters */ switch (*s++) { /* evaluate option */ case 'm': optarg = &mname; break; case 'p': param = (float)strtod(s, &s); break; case 'q': balance = (*s) ? *s++ : 0; break; case 't': maxht = (int)strtol(s, &s, 0); break; case 'k': chklb = 1; break; case 'a': dmode |= DT_ALIGN; break; case 'v': dmode |= DT_REL; break; case 'l': maxlen = (int)strtol(s, &s, 0); break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; case 'u': optarg = &uvchars; break; case 'n': flags |= AS_WEIGHT; break; case 'd': flags |= AS_DFLT; break; case 'h': optarg = &fn_hdr; break; default : error(E_OPTION, *--s); break; } /* set option variables */ if (!*s) break; /* if at end of string, abort loop */ if (optarg) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* if argument is no option */ switch (k++) { /* evaluate non-option */ case 0: fn_dt = s; break; case 1: fn_pdt = s; break; case 2: fn_tab = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check option argument */ if ((k < 2) || (k > 3)) error(E_ARGCNT); if (fn_hdr && (strcmp(fn_hdr, "-") == 0)) fn_hdr = ""; /* convert "-" to "" */ i = (!fn_dt || !*fn_dt) ? 1 : 0; if (fn_tab && !*fn_tab) i++; if (fn_hdr && !*fn_hdr) i++; /* check assignments of stdin: */ if (i > 1) error(E_STDIN); /* stdin must not be used twice */ if ((balance != 0) && (balance != 'l') && (balance != 'b') && (balance != 's')) error(E_BALANCE, balance); /* check balancing mode */ if (fn_hdr) /* set the header file flag */ flags = AS_ATT | (flags & ~AS_DFLT); /* --- translate pruning method --- */ if (!mname) mname = "clvl"; /* set default method name */ method = code(pmtab, mname); /* and get the method code */ if (method < 0) error(E_METHOD, method); /* --- read decision tree --- */ t = clock(); /* start the timer */ scan = sc_create(fn_dt); /* create a scanner */ if (!scan) error((!fn_dt || !*fn_dt) ? E_NOMEM : E_FOPEN, fn_dt); attset = as_create("domains", att_delete); if (!attset) error(E_NOMEM); /* create an attribute set */ fprintf(stderr, "\nreading %s ... ", sc_fname(scan)); if ((sc_nexter(scan) < 0) /* start scanning (get first token) */ || (as_parse(attset, scan, AT_ALL) != 0) || (as_attcnt(attset) <= 0)) /* parse attribute set */ error(E_PARSE, sc_fname(scan)); dtree = dt_parse(attset,scan);/* parse the decision tree */ if (!dtree || !sc_eof(scan)) error(E_PARSE, fn_dt); sc_delete(scan); scan = NULL; /* delete the scanner */ fprintf(stderr, "[%d attribute(s)/%d node(s)/%d level(s)] done ", as_attcnt(attset), dt_size(dtree), dt_height(dtree)); fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)); /* --- read table --- */ as_chars(attset, blanks, fldseps, recseps, uvchars); if (k > 2) { /* if a table file name is given */ table = io_tabin(attset, fn_hdr, fn_tab, flags, "table", 2); if (!table) error(1); /* read the table file */ t = clock(); /* start the timer */ fprintf(stderr, "reducing table ... "); tab_reduce(table); /* reduce the table for speed up */ if (balance /* if the balance flag is set */ && (att_type(as_att(attset, dt_trgid(dtree))) == AT_SYM)) { tab_balance(table, dt_trgid(dtree), (balance == 'l') ? -2.0F : (balance == 'b') ? -1.0F : 0.0F, NULL); } /* balance the class frequencies */ fprintf(stderr,"[%d/%g tuple(s)] done ", tab_tplcnt(table), tab_getwgt(table, 0, INT_MAX)); fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)); } /* print a log message */ /* --- prune decision/regression tree --- */ t = clock(); /* start the timer */ fprintf(stderr, "pruning tree ... "); /* print a log message */ if (dt_prune(dtree, method, param, maxht, chklb, table) != 0) error(E_NOMEM); /* prune the decision/regression tree */ attcnt = dt_attchk(dtree); /* mark the occurring attributes */ nodecnt = dt_size(dtree); /* get the number of nodes, */ maxht = dt_height(dtree); /* the height of the tree, */ tplwgt = dt_total(dtree); /* and the tuple weight sum */ fprintf(stderr, "[%d attribute(s)/%d node(s)/%d level(s)] done ", attcnt, nodecnt, maxht); fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)); /* --- write decision/regression tree --- */ t = clock(); /* start the timer */ if (fn_pdt && *fn_pdt) /* if an output file name is given, */ out = fopen(fn_pdt, "w"); /* open output file for writing */ else { /* if no output file name is given, */ out = stdout; fn_pdt = "<stdout>"; } /* write to std. output */ fprintf(stderr, "writing %s ... ", fn_pdt); if (!out) error(E_FOPEN, fn_pdt); if (as_desc(attset, out, AS_TITLE|AS_MARKED|AS_IVALS, maxlen) != 0) error(E_FWRITE, fn_pdt); /* describe the attribute domains */ fputc('\n', out); /* leave one line empty */ if (dt_desc(dtree, out, DT_TITLE|dmode, maxlen) != 0) error(E_FWRITE, fn_pdt); /* describe the dec./reg. tree */ if (maxlen <= 0) maxlen = 72; /* determine maximal line length */ fputs("\n/*", out); /* append additional information */ for (k = maxlen -2; --k >= 0; ) fputc('-', out); fprintf(out, "\n number of attributes: %d", attcnt); fprintf(out, "\n tree height : %d", maxht); fprintf(out, "\n number of nodes : %d", nodecnt); fprintf(out, "\n number of tuples : %g\n", tplwgt); for (k = maxlen -2; --k >= 0; ) fputc('-', out); fputs("*/\n", out); /* terminate additional information */ if (out != stdout) { /* if not written to standard output, */ i = fclose(out); out = NULL;/* close the output file */ if (i != 0) error(E_FWRITE, fn_pdt); } /* check for success and */ fprintf(stderr, "[%d attribute(s)/%d node(s)/%d level(s)] done ", attcnt, nodecnt, maxht); fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)); /* --- clean up --- */ #ifndef NDEBUG if (table) tab_delete(table, 0); dt_delete(dtree, 1); /* delete table, dec./reg. tree, */ #endif /* and attribute set */ #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */} /* main() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -