📄 rsx.c
字号:
printf("%s\n", VERSION); printf("-x adapt rule set support/confidence " "(default: classify cases)\n"); printf("-p# prediction field name " "(default: %s)\n", res.n_pred); printf("-s# support field name " "(default: no support field)\n"); printf("-c# confidence field name " "(default: no confidence field)\n"); printf("-a align fields of output table " "(default: do not align)\n"); printf("-w do not write field names to output file\n"); printf("-b/f/r# blank characters, field and record separators\n" " (default: \" \\t\\r\", \" \\t\", \"\\n\")\n"); printf("-u# unknown value characters (default: \"?\")\n"); printf("-n number of tuple occurrences in last field\n"); printf("rsfile file containing rule set description\n"); printf("-d use default header " "(field names = field numbers)\n"); printf("-h read table header (field names) from hdrfile\n"); printf("hdrfile file containing table header (field names)\n"); printf("tabfile table file to read " "(field names in first record)\n"); printf("outfile file to write output table/rule set to " "(optional)\n"); return 0; /* print a usage message */ } /* and abort the program */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (1) { /* traverse characters */ switch (*s++) { /* evaluate option */ case 'x': adapt = 1; break; case 'p': optarg = &res.n_pred; break; case 's': optarg = &res.n_supp; break; case 'c': optarg = &res.n_conf; break; case 'a': outflags |= AS_ALIGN; break; case 'w': outflags &= ~AS_ATT; break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; case 'u': optarg = &uvchars; break; case 'n': outflags |= AS_WEIGHT; inflags |= AS_WEIGHT; break; case 'l': maxlen = (int)strtol(s, &s, 0); break; case 'd': inflags |= AS_DFLT; break; case 'h': optarg = &fn_hdr; break; default : error(E_OPTION, *--s); break; } /* set option variables */ if (!*s) break; /* if at end of string, abort loop */ if (optarg) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* if argument is no option */ switch (k++) { /* evaluate non-option */ case 0: fn_rs = s; break; case 1: fn_tab = s; break; case 2: fn_out = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check option argument */ if ((k < 2) || (k > 3)) error(E_ARGCNT); if (fn_hdr && (strcmp(fn_hdr, "-") == 0)) fn_hdr = ""; /* convert "-" to "" */ i = (!fn_rs || !*fn_rs) ? 1 : 0; if (fn_tab && !*fn_tab) i++; if (fn_hdr && !*fn_hdr) i++; /* check assignments of stdin: */ if (i > 1) error(E_STDIN); /* stdin must not be used twice */ if (fn_hdr) /* set the header file flag */ inflags = AS_ATT | (inflags & ~AS_DFLT); if ((outflags & AS_ATT) && (outflags & AS_ALIGN)) outflags |= AS_ALNHDR; /* set align to header flag */ /* --- read rule set --- */ t = clock(); /* start the timer */ scan = sc_create(fn_rs); /* create a scanner */ if (!scan) error((!fn_rs || !*fn_rs) ? E_NOMEM : E_FOPEN, fn_rs); attset = as_create("domains", att_delete); if (!attset) error(E_NOMEM); /* create an attribute set */ fprintf(stderr, "\nreading %s ... ", sc_fname(scan)); if ((sc_nexter(scan) < 0) /* start scanning (get first token) */ || (as_parse(attset, scan, AT_ALL) != 0) || (as_attcnt(attset) <= 0)) /* parse attribute set */ error(E_PARSE, sc_fname(scan)); ruleset = rs_parse(attset, scan); /* parse the rule set */ if (!ruleset || !sc_eof(scan)) error(E_PARSE, fn_rs); sc_delete(scan); scan = NULL; /* delete the scanner */ i = rs_rulecnt(ruleset); /* check whether the rule set */ if (i < 0) error(E_EMPTY); /* contains at least one rule */ attid = r_headatt(rs_rule(ruleset, 0)); while (--i > 0) { /* check for a unique head attribute */ if (r_headatt(rs_rule(ruleset, i)) != attid) break; } if (i > 0) error(E_HEAD); /* (otherwise execution is imposs.) */ fprintf(stderr, "[%d attribute(s), %d rule(s)] done ", as_attcnt(attset), rs_rulecnt(ruleset)); fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)); if (adapt) { /* if to adapt support and confidence */ for (i = rs_rulecnt(ruleset); --i >= 0; ) { rule = rs_rule(ruleset,i);/* traverse the rules */ r_setsupp(rule, 0); r_setconf(rule, 0); } /* clear support and confidence */ } /* of each rule */ /* --- read table header --- */ t = clock(); /* start the timer */ res.att = as_att(attset, attid); res.type = att_type(res.att); /* get the target attribute */ for (attid = as_attcnt(attset); --attid >= 0; ) att_setmark(as_att(attset, attid), 1); att_setmark(res.att, 0); /* mark all attribs. except the class */ as_chars(attset, blanks, fldseps, recseps, uvchars); in = io_hdr(attset, fn_hdr, fn_tab, inflags|AS_MARKED, 1); if (!in) error(1); /* read the table header */ /* --- process tuples --- */ if ((att_getmark(res.att) < 0)/* either the class must be present */ && (k <= 2)) /* or an output file must be written */ error(E_CLASS, att_name(res.att), fn_tab); if ((k > 2) && !adapt) { /* if to write an output table */ if ((outflags & AS_ALIGN) /* if to align output file */ && (in != stdin)) { /* and not to read from stdin */ i = AS_INST | (inflags & ~(AS_ATT|AS_DFLT)); while (as_read(attset, in, i) == 0); fclose(in); /* determine the column widths */ in = io_hdr(attset, fn_hdr, fn_tab, inflags|AS_MARKED, 1); if (!in) error(1); /* reread the table header */ } /* (necessary because of first tuple) */ if (fn_out && *fn_out) /* if an output file name is given, */ out = fopen(fn_out, "w"); /* open the output file for writing */ else { /* if no output file name is given, */ out = stdout; fn_out = "<stdout>"; } /* write to stdout */ if (!out) error(E_FOPEN, fn_out); k = AS_MARKED|AS_INFO1|AS_RDORD|outflags; if (outflags & AS_ATT) /* if to write table header */ as_write(attset, out, k, infout); k = AS_INST|(k & ~AS_ATT); /* write the attribute names */ } /* to the output file */ f = AS_INST | (inflags & ~(AS_ATT|AS_DFLT)); i = ((inflags & AS_DFLT) && !(inflags & AS_ATT)) ? 0 : as_read(attset, in, f); inst = att_inst(res.att); /* get the instance of the target */ while (i == 0) { /* record read loop */ i = rs_exec(ruleset); /* find the first applicable rule */ if (i >= 0) { /* if a rule is applicable, */ rule = rs_rule(ruleset,i);/* get and execute this rule */ res.pred = *r_headval(rule); res.supp = r_getsupp(rule); res.conf = r_getconf(rule); } else { /* if no rule is applicable */ rule = NULL; /* use the rule variable as a flag */ if (res.type == AT_SYM) res.pred.i = UV_SYM; else res.pred.f = UV_FLT; res.supp = res.conf = 0; /* set an unknown prediction and */ } /* clear the support and confidence */ wgt = as_getwgt(attset); /* classify tuple */ tplwgt += wgt; tplcnt++; /* count tuple and sum its weight */ if (adapt && rule) r_setsupp(rule, res.supp +wgt); if (res.type == AT_SYM) { /* if the target att. is symbolic */ if ((inst->i > UV_SYM) && (res.pred.i != inst->i)) errcnt += wgt; /* count classification errors */ else if (adapt && rule) /* sum the correct classifications */ r_setconf(rule, res.conf +wgt); } else { /* if the target att. is numeric */ if (res.type == AT_INT) /* if it is integer-valued */ diff = (inst->i > UV_INT) ? res.pred.f -inst->i : 0; else /* if it is float-valued */ diff = (inst->f > UV_FLT) ? res.pred.f -inst->f : 0; errcnt += d = diff *diff; /* sum the squared differences */ if (adapt && rule) /* for all tuples and for the rule */ r_setconf(rule, res.conf +d); } if (out && (as_write(attset, out, k, infout) != 0)) error(E_FWRITE, fn_out); /* write tuple to output file */ i = as_read(attset, in, f); /* try to read the next record */ } if (i < 0) { /* if an error occurred, */ err = as_err(attset); /* get the error information */ tplcnt += (inflags & (AS_ATT|AS_DFLT)) ? 1 : 2; io_error(i, fn_tab, tplcnt, err->s, err->fld, err->exp); error(1); /* print an error message */ } /* and abort the program */ if (in != stdin) fclose(in); /* close the table file and */ in = NULL; /* clear the file variable */ if (out && (out != stdout)) { /* if an output file exists, */ i = fclose(out); out = NULL;/* close the output file */ if (i) error(E_FWRITE, fn_out); } /* print a success message */ fprintf(stderr, "[%d/%g tuple(s)] done ", tplcnt, tplwgt); fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)); /* --- write the adapted rule set --- */ if (adapt) { /* if to adapt support and confidence */ t = clock(); /* start the timer */ for (i = rs_rulecnt(ruleset); --i >= 0; ) { rule = rs_rule(ruleset,i);/* traverse the rules */ wgt = r_getsupp(rule); /* get and check */ if (wgt <= 0) continue; /* the new support of each rule */ wgt = r_getconf(rule) /wgt; r_setconf(rule, (res.type == AT_SYM) ? wgt : sqrt(wgt)); } /* compute and set the new confidence */ if (!fn_out || !*fn_out) { /* if no proper file name is given, */ out = stdout; fn_out = "<stdout>"; } /* write to stdout */ else { /* if a table file is given, */ fprintf(stderr, "writing %s ... ", fn_out); out = fopen(fn_out, "w"); /* open output file for writing */ if (!out) error(E_FOPEN, fn_out); } outflags = RS_TITLE|RS_INFO|RS_CONF|RS_SUPP; if ((as_desc(attset, out, AS_TITLE|AS_MARKED, maxlen) != 0) || (rs_desc(ruleset, out, outflags, maxlen) != 0)) error(E_FWRITE, fn_out); /* describe domains and rule set */ if (out != stdout) { /* if not written to standard output, */ i = fclose(out); out = NULL; /* close the output file */ if (i != 0) error(E_FWRITE, fn_rs); } /* print a success message */ fprintf(stderr, "[%d attribute(s), %d rule(s)] done ", as_attcnt(attset), rs_rulecnt(ruleset)); fprintf(stderr, "[%.2fs].\n", SEC_SINCE(t)); } /* --- print the error statistics --- */ if (att_getmark(res.att) >= 0) { /* if the target is present */ if (res.type != AT_SYM) /* if the target att. is numeric */ fprintf(stderr, "root of mean squared error: %g\n", (tplwgt > 0) ? sqrt(errcnt /tplwgt) : 0); else /* if the target att. is symbolic */ fprintf(stderr, "%g error(s) (%.2f%%)\n", errcnt, (tplwgt > 0) ? 100*(errcnt /tplwgt) : 0); } /* print number of misclass. */ /* --- clean up --- */ #ifndef NDEBUG rs_delete(ruleset, 1); /* delete the rule set */ #endif /* and the attribute set */ #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */} /* main() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -