📄 transform-main.c
字号:
/****************************************************************************** transform-main.c -- transform a dataset using learned hypotheses******************************************************************************/#include <stdio.h>#include <math.h>#include "ripper.h"#include "mdb.h"#define MAXHYPS 1000static BOOL form_covers_prefix(int,vec_t *,vec_t *);static void fprint_rule_prefix(int,FILE *,rule_t *);#define print_rule_prefix(n,r) fprint_rule_prefix(n,stdout,r)/*****************************************************************************/char *Program="transform-main";char *Help_str[] = { "syntax: transform -i in.data -o out.data [-dy,n] [-vN] x.names y1.hyp...", " transform a dataset from the standard representation", " to an indication of which rules fired for which examples,", " using rules from a given set of hypotheses", "", "options are:", " -v# set trace level to #", " -dy assume a default rule which fires on every example", " -dn don't use the default rule", " -p add all prefixes of each rule", " -k keep original attributes", " -i file input examples from file (- indicates stdin)", " -o file output examples from file", "", "default: transform -dy -v1 -i in.data -o out.data", NULL};/*****************************************************************************/int main(argc,argv)int argc;char *argv[];{ char *namef,*hypf,*inf,*outf; concept_t *hyps[MAXHYPS],*hyp; int n_hyp; int o,i,j,k,m; FILE *fp; BOOL assume_default,add_prefixes; vec_t *data; example_t *exi; rule_t *rj; BOOL covered; BOOL keep_original; double tm; set_trace_level(NONE); assume_default = TRUE; add_prefixes = FALSE; keep_original = FALSE; while ((o=getopt(argc,argv,"ki:o:v:d:hp"))!=EOF) { switch (o) { case 'v': set_trace_level(atoi(optarg)); break; case 'k': keep_original = TRUE; break; case 'i': inf = optarg; /* if (strcmp(inf,"-")==0) inf = NULL; */ break; case 'o': outf = optarg; break; case 'd': assume_default = !(optarg[0]=='n' || optarg[0]=='N'); break; case 'p': add_prefixes = TRUE; break; case 'h': case '?': default: give_help(); if (o=='h') exit(0); else fatal("option not implemented"); } } /* load the names file */ if (optind<argc) { namef = argv[optind++]; } else { give_help(); fatal("no names file given"); } ld_names(namef); /* load the hypothesis files */ if (optind>=argc) { warning("no hypothesis files given"); } n_hyp = 0; while (optind<argc) { if (n_hyp >= MAXHYPS) fatal("too many hypothesis files"); hypf = argv[optind++]; if ((fp=fopen(hypf,"r"))==NULL) { fatal("can't open concept file '%s' for read",hypf); } hyp = hyps[n_hyp++] = ld_concept(fp); fclose(fp); trace (SUMM) { for (j=0; j<vmax(hyp->rules); j++) { rj = vref(rule_t,hyp->rules,j); printf("H%02dR%02dC%s: ",n_hyp,j+1,rj->conseq->name); print_rule(rj); printf("\n"); if (add_prefixes) { for (m=1; m<vmax(rj->antec); m++) { printf(" H%02dR%02dP%dC%s: ", n_hyp,j+1,m,rj->conseq->name); print_rule_prefix(m,rj); printf("\n"); } } } } } /* load the data */ trace(SUMM) { start_clock(); printf("// timing loading [from %s]...\n",inf); fflush(stdout); } data = ld_data(inf); if (data==NULL || vmax(data)==0) fatal("no data!"); trace(SUMM) { tm = elapsed_time(); printf("// loaded %d examples %d features %d values in %.2f sec\n", vmax(data),n_fields(),n_symbolic_values(),tm); } /* transform the data */ trace(SUMM) { start_clock(); printf("// transforming data...\n"); fflush(stdout); } if ((fp=fopen(outf,"w"))==NULL) { fatal("can't open output file %s",outf); } for (i=0; i<vmax(data); i++) { exi = vref(example_t,data,i); for (k=0; k<n_hyp; k++) { hyp = hyps[k]; for (j=0; j<vmax(hyp->rules); j++) { rj = vref(rule_t,hyp->rules,j); if (form_covers(rj->antec,exi->inst)) { fprintf(fp,"H%02dR%02dC%s ",k+1,j+1,rj->conseq->name); } if (add_prefixes) { for (m=1; m<vmax(rj->antec); m++) { if (form_covers_prefix(m,rj->antec,exi->inst)) { fprintf(fp,"H%02dR%02dP%02dC%s ", k+1,j+1,m,rj->conseq->name); } } } } } if (assume_default) { fprintf(fp," default"); } if (keep_original) { /* print the original example */ fprintf(fp,", "); fprint_example(fp,exi); } else { /* just print the label */ fprintf(fp,", %s.\n",exi->lab.nom->name); } } trace(SUMM) { tm = elapsed_time(); printf("// transformed in %.2f sec\n",tm); }}/* nearly a copy of form_covers */static BOOL form_covers_prefix(int n,vec_t *sform,vec_t *inst){ register gsym_t *cp,*cpmax; aval_t *ex_val; symbol_t *cp_nom; REAL cp_num; /* optimized loop over all conditions */ cpmax = vbase(gsym_t,sform)+n; for (cp=vbase(gsym_t,sform); cp<cpmax; cp++) { if (!cp->nonterm) { cp_nom = cp->value.nom; cp_num = cp->value.num; /* TEST: does condition cover instance? */ ex_val = vbase(aval_t,inst)+cp->attr_index; if (ex_val->kind==MISSING_VALUE) { return (cp->op==OPEQ && cp_nom && cp_nom->kind==MISSING_MARK); } switch (cp->op) { case OPEQ: if (ex_val->kind==SYMBOL && ex_val->u.nom!=cp_nom) return FALSE; if (ex_val->kind==CONTINUOUS && ex_val->u.num!=cp_num) return FALSE; break; case OPNEQ: if (ex_val->kind==SYMBOL && ex_val->u.nom==cp_nom) return FALSE; if (ex_val->kind==CONTINUOUS && ex_val->u.num==cp_num) return FALSE; break; case OPLE: if (ex_val->u.num>cp_num) return FALSE; break; case OPGE: if (ex_val->u.num<cp_num) return FALSE; break; case OPIN: if (!contains_symbol(cp_nom,ex_val->u.set)) return FALSE; break; case OPOUT: if (contains_symbol(cp_nom,ex_val->u.set)) return FALSE; break; default: assert(0); }/* switch*/ } /* if !nonterm */ } /* for each of the first n gsyms */ return TRUE;}static void fprint_rule_prefix(int n,FILE *fp,rule_t *r){ int i; char *sep = " :- "; gsym_t *gsym; fprint_symbol(fp,r->conseq); for (i=0; i<n; i++) { gsym = vref(gsym_t,r->antec,i); if (!gsym->nonterm) { fprintf(fp,"%s",sep); fprint_gsym(fp,gsym); sep = ", "; } } fprintf(fp," (?/?).");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -