📄 data2text-main.c
字号:
#include <stdio.h>#include "ripper.h"#include "protos.h"#include "mdb.h"/******************************************************************************/char *Program="data2text";char *Help_str[] = { "syntax: data2text [options] [stem]", " convert ripper dataset (.data and .test) to text", "", "options are:", " -v n set verbosity", " -s read from stdin (not stem.data)", NULL};static void data2text(vec_t *,char *);static void create_text_names(char *);static int dlt(char *,char *);main(argc,argv)int argc;char *argv[];{ int o; vec_t *train_data,*test_data; char *stem; BOOL use_stdin; use_stdin = FALSE; set_trace_level(SUMM); while ((o=getopt(argc,argv,"sv:h"))!=EOF) { switch (o) { case 's': use_stdin = TRUE; printf("option: read from stdin\n"); break; case 'v': set_trace_level(atoi(optarg)); break; case 'h': default: give_help(); if (o=='h') exit(0); else fatal("option not implemented"); } } if (optind<argc) { stem = argv[optind++]; ld_names(add_ext(stem,".names")); if (use_stdin) train_data = ld_data(NULL); else train_data = ld_data(add_ext(stem,".data")); test_data = ld_data(add_ext(stem,".test")); create_text_names(add_ext(stem,"_text.names")); } else { stem = "stdin"; train_data = ld_data(NULL); } if (optind<argc) { warning("not all arguments were used: %s ...",argv[optind]); } if (train_data==NULL || vmax(train_data)==0) fatal("no data!"); data2text(train_data,add_ext(stem,"_text.data")); if (test_data) { data2text(test_data,add_ext(stem,"_text.test")); }}/* convert data */static void data2text(vec_t *data,char *outfname){ static double **values=NULL; static int *n_values; example_t *exi; aval_t *vij; symbol_t *sijk; char *quote; int i,j,k; int lo,hi,m; FILE *fp; /* symbol used to quote words */ quote = "'"; /* store and sort all the numeric values from train_data */ if (values==NULL) { values = newmem(n_fields(),double *); n_values = newmem(n_fields(),int); for (j=0; j<n_fields(); j++) { if (continuous_field(j)) { values[j] = newmem(vmax(data),double); k = 0; for (i=0; i<vmax(data); i++) { exi = vref(example_t,data,i); vij = vref(aval_t,exi->inst,j); if (vij->kind != MISSING_VALUE) { values[j][k++] = vij->u.num; } } qsort((char *)values[j],k,sizeof(double),&dlt); n_values[j] = k; } } trace(SUMM) { printf("// constructed discretization values for %s\n",outfname); } } else { trace(SUMM) { printf("// using old discretization values for %s\n",outfname); } } if ((fp = fopen(outfname,"w"))==NULL) { fatal("can't write file %s",outfname); } /* now start the conversion */ for (i=0; i<vmax(data); i++) { exi = vref(example_t,data,i); for (j=0; j<n_fields(); j++) { if (j>0) fprintf(fp," "); vij = vref(aval_t,exi->inst,j); if (vij->kind == MISSING_VALUE) { ; /* for now, print nothing */ } else if (ignored_field(j)) { ; /* print nothing */ } else if (symbolic_field(j)) { assert(vij->u.nom != NULL); fprintf(fp,"%s%s=%s%s", quote,field_name(j),vij->u.nom->name,quote); } else if (set_field(j)) { for (k=0; k<vmax(vij->u.set); k++) { sijk = *vref(symbol_t*,vij->u.set,k); if (k>0) fprintf(fp," "); fprintf(fp,"%s%s~%s%s", quote,field_name(j),sijk->name,quote); } } else if (continuous_field(j)) { /* do binary search for value */ lo = 0; hi = n_values[j]; while (hi>lo) { m = (hi+lo)/2; if (vij->u.num == values[j][m]) { fprintf(fp," %s%s=%g%s", quote,field_name(j),values[j][m],quote); break; } else if (vij->u.num > values[j][m]) { fprintf(fp," %s%s>%g%s", quote,field_name(j),values[j][m],quote); lo = m+1; } else /* (vij->u.num < values[j][m]) */ { fprintf(fp," %s%s<%g%s", quote,field_name(j),values[j][m],quote); hi = m; } } } else { assert(FALSE); } } /* field j */ fprintf(fp,",%s.\n",exi->lab.nom->name); } /* example i */ trace(SUMM) { printf("// created file %s\n", outfname); } fclose(fp);}/* for qsort */static int dlt(char *p1,char *p2){ double *d1,*d2; d1 = (double *)p1; d2 = (double *)p2; if ((*d1) < (*d2)) return -1; else if ((*d1) > (*d2)) return +1; else return 0;}static void create_text_names(char *namef){ int i; FILE *fp; if ((fp = fopen(namef,"w"))==NULL) { fatal("can't write %s",namef); } trace(SUMM) printf("// creating names file\n"); for (i=0; i<vmax(Classes); i++) { if (i>0) fprintf(fp,","); fprint_symbol(fp,vref(atom_t,Classes,i)->nom); } fprintf(fp,".\nTESTS:\tset.\n"); fclose(fp);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -