⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 data2text-main.c

📁 Ripper 分类算法
💻 C
字号:
#include <stdio.h>#include "ripper.h"#include "protos.h"#include "mdb.h"/******************************************************************************/char *Program="data2text";char *Help_str[] = {    "syntax: data2text [options] [stem]",    "   convert ripper dataset (.data and .test) to text",    "",    "options are:",    " -v n   set verbosity",    " -s     read from stdin (not stem.data)",    NULL};static void data2text(vec_t *,char *);static void create_text_names(char *);static int dlt(char *,char *);main(argc,argv)int argc;char *argv[];{    int o;    vec_t *train_data,*test_data;    char *stem;    BOOL use_stdin;    use_stdin = FALSE;    set_trace_level(SUMM);    while ((o=getopt(argc,argv,"sv:h"))!=EOF) {	switch (o) {	  case 's':	    use_stdin = TRUE;	    printf("option: read from stdin\n");	    break;	  case 'v':	    set_trace_level(atoi(optarg));	    break;	  case 'h':	  default: 	    give_help();	    if (o=='h') exit(0);	    else fatal("option not implemented");	}    }    if (optind<argc) {	stem = argv[optind++];	ld_names(add_ext(stem,".names"));	if (use_stdin) train_data = ld_data(NULL);	else train_data = ld_data(add_ext(stem,".data"));	test_data = ld_data(add_ext(stem,".test"));	create_text_names(add_ext(stem,"_text.names"));    } else {	stem = "stdin";	train_data = ld_data(NULL);    }    if (optind<argc) {	warning("not all arguments were used: %s ...",argv[optind]);    }    if (train_data==NULL || vmax(train_data)==0) fatal("no data!");    data2text(train_data,add_ext(stem,"_text.data"));    if (test_data) {	data2text(test_data,add_ext(stem,"_text.test"));	    }}/* convert data */static void data2text(vec_t *data,char *outfname){    static double **values=NULL;    static int *n_values;    example_t *exi;    aval_t *vij;    symbol_t *sijk;    char *quote;    int i,j,k;     int lo,hi,m;    FILE *fp;    /* symbol used to quote words */    quote = "'";    /* store and sort all the numeric values from train_data */    if (values==NULL) {	values = newmem(n_fields(),double *);	n_values = newmem(n_fields(),int);	for (j=0; j<n_fields(); j++) {	    if (continuous_field(j)) {		values[j] = newmem(vmax(data),double);		k = 0;		for (i=0; i<vmax(data); i++) {		    exi = vref(example_t,data,i); 		    vij = vref(aval_t,exi->inst,j);		    if (vij->kind != MISSING_VALUE) {			values[j][k++] = vij->u.num;		    }		}		qsort((char *)values[j],k,sizeof(double),&dlt);		n_values[j] = k;	    }	}	trace(SUMM) {	    printf("// constructed discretization values for %s\n",outfname);	}    } else {	trace(SUMM) {	    printf("// using old discretization values for %s\n",outfname);	}    }    if ((fp = fopen(outfname,"w"))==NULL) {	fatal("can't write file %s",outfname);    }    /* now start the conversion */    for (i=0; i<vmax(data); i++) {	exi = vref(example_t,data,i); 	for (j=0; j<n_fields(); j++) {	    if (j>0) fprintf(fp," ");	    vij = vref(aval_t,exi->inst,j);	    if (vij->kind == MISSING_VALUE) {		; /* for now, print nothing */	    } else if (ignored_field(j)) {		; /* print nothing */	    } else if (symbolic_field(j)) {		assert(vij->u.nom != NULL);		fprintf(fp,"%s%s=%s%s",		       quote,field_name(j),vij->u.nom->name,quote);	    } else if (set_field(j)) {		for (k=0; k<vmax(vij->u.set); k++) {		    sijk = *vref(symbol_t*,vij->u.set,k);		    if (k>0) fprintf(fp," ");		    fprintf(fp,"%s%s~%s%s", 			   quote,field_name(j),sijk->name,quote);		}	    } else if (continuous_field(j)) {		/* do binary search for value */		lo = 0; hi = n_values[j];		while (hi>lo) {		    m = (hi+lo)/2;		    if (vij->u.num == values[j][m]) {			fprintf(fp," %s%s=%g%s", 			   quote,field_name(j),values[j][m],quote);			break;					    } else if (vij->u.num > values[j][m]) {			fprintf(fp," %s%s>%g%s", 			   quote,field_name(j),values[j][m],quote);			lo = m+1;		    } else /*  (vij->u.num < values[j][m]) */ {			fprintf(fp," %s%s<%g%s", 			   quote,field_name(j),values[j][m],quote);			hi = m;		    }		}	    } else {		assert(FALSE);	    }	} /* field j */	fprintf(fp,",%s.\n",exi->lab.nom->name);    } /* example i */    trace(SUMM) {	printf("// created file %s\n", outfname);    }    fclose(fp);}/* for qsort */static int dlt(char *p1,char *p2){    double *d1,*d2;    d1 = (double *)p1;    d2 = (double *)p2;    if ((*d1) < (*d2)) return -1;     else if ((*d1) > (*d2)) return +1;     else return 0;}static void create_text_names(char *namef){    int i;    FILE *fp;        if ((fp = fopen(namef,"w"))==NULL) {	fatal("can't write %s",namef);    }    trace(SUMM) printf("// creating names file\n");    for (i=0; i<vmax(Classes); i++) {	if (i>0) fprintf(fp,",");	fprint_symbol(fp,vref(atom_t,Classes,i)->nom);    }    fprintf(fp,".\nTESTS:\tset.\n");    fclose(fp);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -