⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ripper-main.c

📁 Ripper 分类算法
💻 C
字号:
/****************************************************************************** ripper-main.c - main driver program for learning program******************************************************************************/#include <stdio.h>#include <math.h>#include "ripper.h"/*****************************************************************************/static double binomial_std_err(double,int);/* interface to cross-validation*/static concept_t *Hyp=NULL;static void train_ripper(vec_t *data){    if (Hyp!=NULL) free_concept(Hyp);    Hyp = model(data);    printf("Hypothesis:\n");    print_concept(Hyp); }static double test_ripper(vec_t *data){    return error_rate(Hyp,data);}/******************************************************************************/char *Program="ripper";char *Help_str[] = { "syntax: ripper [options] filestem", "  learn a ruleset from examples",  "", "options are:", " -v#      set trace level to #, which must be 0, 1, 2, or 3", " -n       expect noisy data (default)", " -c       expect clean data", " -kN      estimate error rate by N-fold cross-validation",    " -l       estimate error rate via leave-one-out method", "           (ie N-fold cross-validation where N is training set size)", " -aORD    arrange classes in order ORD where ORD must be either",  "              +freq---order by increasing frequency (the default)", "              -freq---order by decreasing frequency", "              mdl---order by description length of ruleset", "              given---order classes as listed in .names file", "              unordered--don't order classes", " -g str  use grammar file str.gram (rather than stem.gram)", " -f str  use names file str.gram (rather than stem.gram)", " -s      read data file from stdin", "",   "other options:", " -M #    use subsamples of size # in choosing tests", " -O #    perform # optimization passes", " -G      print completed grammar and exit", " -N      print names file and exit", " -R      randomize operation", " -D #    set max decompression", " -I#     discretize continuous attributes into # equal-frequency segments", " -!ns    allow/disallow negative tests in nominal/set valued attributes", "         ('negative' tests are != for nominals, !~ for sets)", " -S #    simplify hypothesis more (>1) or less (<1)", " -L #    set loss ratio to # (ratio=false-pos-cost/false-neg-cost),", "         where a 'positive' example is an instance of the minority class", " -A      add redundant features to rules ", " -F #    force rules to cover at least # examples", " -E      extend each rule to be very specific and write", "         the revised rules into a file .ehyp (for experiments)",  "", "default: ", "  ripper -v0 -n -a+freq -O2 -D64 -!n -S0.5 -L1.0 -F1 -M+infty <filestem>", "", NULL};/*****************************************************************************/int main(argc,argv)int argc;char *argv[];{    int o;     char *charp;    vec_t *train_data,*test_data;    char *stem="foo", *gstem=NULL, *nstem=NULL;    concept_t *hyp;    BOOL print_grammar_flag=FALSE;    BOOL print_names_flag=FALSE;    BOOL crossval=FALSE;    BOOL leave_one_out=FALSE;    int folds;    double loss_ratio;    FILE *out_fp;    BOOL randomize_seed=FALSE;    BOOL use_stdin=FALSE;    double tm,err;    BOOL do_extend_rules=FALSE;    set_trace_level(NONE);    folds=5;    while ((o=getopt(argc,argv,"hv:nck:la:g:f:tsO:GNRL:D:I:!:S:M:AF:E"))	   !=EOF)     {	switch (o) {	case 'v':	    set_trace_level(atoi(optarg)); 	    printf("option: trace level set to %d\n",trace_level());	    break;	case 'n':	    Simplify = TRUE;	    printf("option: data is noisy\n");	    break;	case 'c':	    Simplify = FALSE;	    printf("option: data is clean\n");	    break;	case 'k':	    crossval = TRUE;	    folds = atoi(optarg);	    printf("option: %d-fold cross-validation\n",folds);	    break;	case 'l':	    crossval = TRUE;	    folds = 0;	    printf("option: leave-one-out cross-validation\n");	    break;	case 'a': 	    switch (optarg[0]) {	    case '+': 		Class_ordering = INCFREQ;		printf("option: find rules for least frequent classes first\n");		break;	    case '-':		Class_ordering = DECFREQ;		printf("option: find rules for most frequent classes first\n");		break;	    case 'g':	    case 'G':		Class_ordering = GIVEN;		printf("option: use class ordering given in names file\n");		break;	    case 'm':	    case 'M':		Class_ordering = MDL;		printf("option: choose ordering of classes using MDL\n");		break;	    case 'u':	    case 'U':		Class_ordering = UNORDERED;		printf("option: unordered classes\n");		break;	    default:		warning("bad argument to -o option");		break;	    }	    break;	  case 'g':	    gstem = newmem(strlen(optarg)+1,char);	    strcpy(gstem,optarg);	    printf("option: use grammar file '%s.gram'\n",gstem);	    break;	  case 'f':	    nstem = newmem(strlen(optarg)+1,char);	    strcpy(nstem,optarg);	    printf("option: use names file '%s.gram'\n",nstem);	    break;	  case 's':	    use_stdin = TRUE;	    printf("option: read data from stdin\n");	    break;	  case 'O':	    Optimizations = atoi(optarg);	    printf("option: optimize %d time(s)\n",Optimizations);	    break;	  case 'S':	    Simplify=TRUE;	    MDL_theory_factor = atof(optarg);	    printf("option: multiply coding cost of theory by %g\n",MDL_theory_factor);	    break;	  case 'D':	    Max_decompression = atof(optarg);	    printf("option: max decompression is %g\n",Max_decompression);	    break;	  case '!':	    for (charp=optarg; *charp; charp++) {		if (*charp=='n' || *charp=='N') Eq_negations = TRUE;		if (*charp=='s' || *charp=='S') Set_negations = TRUE;	    }	    printf("option: %s allow inequality tests for nominal attributes\n",		   Eq_negations? "will" : "will not");	    printf("option: %s allow inequality tests for set-valued attributes\n",		   Set_negations? "will" : "will not");	    break;	  case 'R':	    randomize_seed=TRUE;	    printf("option: will set random seed from clock\n");	    break;	  case 'L':	    loss_ratio = atof(optarg);	    FP_cost = 2.0*loss_ratio/(loss_ratio+1.0);	    FN_cost = 2.0/(loss_ratio+1.0);	    printf("option: ratio of cost of FP to cost of FN is %g:%g\n",		   FP_cost,FN_cost);	    break;	  case 'I':	    N_intervals = atoi(optarg);	    if (N_intervals < 0) 	      fatal("argument to -i must be a positive integer or 0");	    if (N_intervals==0) 	      printf("option: no discretization\n",N_intervals);	    else	      printf("option: discretize into %d intervals\n",N_intervals);	    break;	  case 'G':	    /* printf("option: will echo grammar\n"); */	    print_grammar_flag = TRUE;	    break;	  case 'N':	    /* printf("option: will echo names file\n"); */	    print_names_flag = TRUE;	    break;	  case 'M':	    Max_sample = atoi(optarg);	    printf("option: max subsample is %d\n",Max_sample); 	    break;	  case 'A':	    Add_redundancy = TRUE;	    printf("option: add redundant conditions to rules\n");	    break;	  case 'F':	    Min_coverage = max(atoi(optarg),1);	    printf("option: rules must cover %d example(s)\n",Min_coverage); 	    break;	  case 'E':	    do_extend_rules = 1;	    printf("option: will save extended rules\n"); 	    break;	  case 'h':	  case '?':	  default: 	    give_help();	    if (o!='h') fatal("option not implemented");	    else exit(0);	}    }        /* print out parameter settings */    trace(SUMM) {	printf("// parameter settings:\n");	if (N_intervals!=0)	  printf("//   discretize into %d intervals\n",N_intervals);	else 	  printf("//   no discretization\n");	if (Max_sample!=0) 	  printf("//   use subsamples of %d examples\n",Max_sample);	printf("//   %s != tests in rules\n",	       Eq_negations?"allow":"disallow");	printf("//   %s !~ tests in rules\n",	       Set_negations?"allow":"disallow");	if (Class_ordering==UNORDERED) {	    printf("//   not ordering classes\n"); 	} else {	    printf("//   ordering classes by ");	    switch (Class_ordering) {	      case GIVEN: printf("ordering given in .names file\n"); break;	      case MDL: printf("MDL heuristic\n"); break;	      case INCFREQ: printf("increasing frequency\n"); break;	      case DECFREQ: printf("decreasing frequency\n"); break;	      default: printf("???\n"); break;	    }	}	printf("//   expect %s data\n",Simplify?"noisy":"clean");	if (Simplify) {	    printf("//   optimize %d time(s)\n",Optimizations);	    printf("//   max decompression is %g bits\n",		   Max_decompression);	}	printf("//   rules cover %d examples\n",Min_coverage);	fflush(stdout);    }    if (optind<argc) {	stem = argv[optind++];	if (gstem==NULL) gstem=stem;    } else {	give_help();	fatal("no file stem specified");    }    if (optind<argc) {	warning("not all arguments were used: %s ...",argv[optind]);    }    trace(SUMM) printf("// loading training data...\n");    if (nstem==NULL) nstem = stem;    ld_names(add_ext(nstem,".names"));    if (use_stdin) {	train_data = ld_data((char *)NULL);    } else {	train_data = ld_data(add_ext(stem,".data"));    }    if (!train_data) {	fatal("no training data!");    }    trace(SUMM) printf("// loading test data...\n");    if (!crossval) test_data = ld_data(add_ext(stem,".test"));    else test_data = NULL;    if (!ld_grammar(add_ext(gstem,".gram"),gstem==stem,train_data)) {	fatal("error in loading grammar");    }    if (print_grammar_flag) {	print_grammar();	return 0;    }    if (print_names_flag) {	print_names();	return 0;    }    if (randomize_seed) randomize();    if (crossval) {	cross_validate(folds,train_data,&train_ripper,&test_ripper);    } else {	if ((out_fp=fopen(add_ext(stem,".hyp"),"w"))==NULL) {	    fatal("can't open output file");	}	trace(SUMM) {	    printf("// timing model command...\n"); 	    fflush(stdout);	}	start_clock(); 	hyp = model(train_data);	tm = elapsed_time();	trace(SUMM) {	    printf("// model command took %.2f sec\n",tm);	    fflush(stdout);	}	printf("Final hypothesis is:\n");	print_concept(hyp); 	fshow_concept(out_fp,hyp); 	fclose(out_fp);	printf("===============================");	printf(" summary ");	printf("===============================\n");	err = error_rate(hyp,train_data);	printf("Train error rate:  %.2f%% +/- %.2f%% (%d datapoints)    <<\n",	       (err*100),	       100*binomial_std_err(err,vmax(train_data)),	       vmax(train_data));	if (test_data) {	    err = error_rate(hyp,test_data);	    printf("Test error rate:   %.2f%% +/- %.2f%% (%d datapoints)   <<\n",		   100*err,		   100*binomial_std_err(err,vmax(test_data)),		   vmax(test_data));	}	printf("Hypothesis size:   %d rules, %d conditions\n",	       vmax(hyp->rules),concept_size(hyp));	printf("Learning time:     %.2f sec\n",tm);	if (do_extend_rules) {	    if ((out_fp=fopen(add_ext(stem,".ehyp"),"w"))==NULL) {		fatal("can't open secondard output file");	    }	    extend_rules(out_fp,hyp,train_data);	}    }    return 0;}static double binomial_std_err(err_rate,n)double err_rate;int n;{    return sqrt( (err_rate)*(1-err_rate)/((double)n-1) );}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -