📄 summarize-main.c
字号:
/****************************************************************************** summarize-main.c - driver program for summarization program******************************************************************************/#include <stdio.h>#include <math.h>#include "ripper.h"/*****************************************************************************/#define MIN_ENTRY_WIDTH 2#define PCT(num,denom,def) (denom!=0.0 ? (num*100.0)/(denom) : (def)) static void extend_matrix(vec_t *,vec_t *,symbol_t *);static char *class_string(symbol_t *);static int entry_width(vec_t *,vec_t *);static symbol_t *Stop=NULL;static symbol_t *Sep=NULL;static symbol_t *Colon=NULL;static double F(double,double,double);static int format_aux(char *,double);/******************************************************************************/char *Program="summarize";char *Help_str[] = { "syntax: summarize [options] stem", " summarize a prediction file", "", "options are:", " -c n: if printing one-line summary, use class n (0-k)", " -f fmt: print one-line formatted summary of one class", " fmt is a string containing these control characters", " %c - print class name", " %s - print file stem", " %t - print total number of examples", " %e - print error rate", " %e+ - print error rate in percent", " %e# - print number of errors", " %r - print recall (r+, in percent)", " %p - print precision (p+, in percent)", " %f - print fallout (f+, in percent)", " %Fn - print F[beta] for beta=n (default 1)", NULL};/*****************************************************************************/int main(argc,argv)int argc;char *argv[];{ char *stem; int o; int i,j,k; int mij,fp,fn,tp,tn; int w; int errs,tot; double prec,rec,fall; symbol_t *pred_class,*actual_class; char pred_str[BUFSIZ],actual_str[BUFSIZ]; double p,n; vec_t *classes; /* vector of symbols */ vec_t *matrix; /* vector of vectors of integers */ vec_t *classrow; /* vector of vectors of integers */ int *entry; char format_string[BUFSIZ]; char *cp,*end; double beta; BOOL formatted; BOOL format_class; int toti,totc; formatted = FALSE; strcpy(format_string,"r%p%f%F0.5F1F2c"); format_class = NULLINDEX; while ((o=getopt(argc,argv,"f:c:h"))!=EOF) { switch (o) { case 'f': formatted=TRUE; strcpy(format_string,optarg); break; case 'c': format_class = atoi(optarg); break; case 'h': case '?': default: give_help(); if (o=='h') exit(0); else fatal("option not implemented"); } } tot = errs = 0; classes = new_vec(symbol_t); matrix = new_vec(vec_t); if (optind<argc) { stem = argv[optind++]; if (ld_names(add_ext(stem,".names"))) { for (i=0; i<vmax(Classes); i++) { actual_class = vref(atom_t,Classes,i)->nom; extend_matrix(classes,matrix,actual_class); } } } else { stem = "stdin"; } if (optind<argc) { warning("not all arguments were used: %s ...",argv[optind]); } while (scanf("%s %lf %lf %s\n",pred_str,&p,&n,actual_str)!=EOF) { tot++; pred_class = intern(pred_str); actual_class = intern(actual_str); extend_matrix(classes,matrix,pred_class); extend_matrix(classes,matrix,actual_class); if (pred_class!=actual_class) errs++; classrow = vref(vec_t,matrix,actual_class->index); entry = vref(int,classrow,pred_class->index); (*entry)++; } if (formatted && format_class==NULLINDEX) { /* figure out minority class */ totc = tot+1; for (i=0; i<vmax(classes); i++) { toti = 0; classrow = vref(vec_t,matrix,i); for (j=0; j<vmax(classrow); j++) { mij = *vref(int,classrow,j); toti += mij; } if (toti<totc) { format_class = i; totc = toti; } } } /* print summary */ if (!formatted) { printf("%d errors in %d trials (%.2f%%)\n\n", errs,tot,PCT(errs,tot,0.0)); if (tot==0) exit(0); /* print recall precision numbers */ printf("recall\tprecis.\tfallout\tF[0.5]\tF[1.0]\tF[2.0]\tfocus class\n"); printf("------\t-------\t-------\t------\t-------\t-------\t-----------\n"); } for (k=0; k<vmax(classes); k++) { /* compute stats for class k */ fp = tp = fn = tn = 0; for (i=0; i<vmax(classes); i++) { classrow = vref(vec_t,matrix,i); for (j=0; j<vmax(classrow); j++) { mij = *vref(int,classrow,j); if (i==k && j==k) tp = mij; else if (i==k && j!=k) fn += mij; else if (i!=k && j!=k) tn += mij; else if (i!=k && j==k) fp += mij; } } rec = PCT(tp,tp+fn,0.0); prec = PCT(tp,tp+fp,100.0); fall = PCT(fp,fp+tn,0.0); /* print out stats */ if (!formatted) { printf("%6.2f\t%6.2f\t%6.2f\t%6.4f\t%6.4f\t%6.4f\t%s\n", rec,prec,fall, F(prec,rec,0.5),F(prec,rec,1.0),F(prec,rec,2.0), vref(symbol_t,classes,k)->name); } else if (k==format_class) { cp = format_string; while (*cp) { if (*cp!='%') putchar(*cp++); else { cp++; switch (*cp) { case '%': putchar(*cp); cp++; break; case 'c': /* class */ printf("%-8.8s",vref(symbol_t,classes,k)->name); cp++; break; case 's': /* filestem */ printf("%-8.8s",stem); cp++; break; case 't': /* total */ printf("%8d",tot); cp++; break; case 'e': /* errors */ cp++; if (*cp=='#') /* count */ { printf("%8d",errs); cp++; } else { cp += format_aux(cp,PCT(errs,tot,0.0)); } break; case 'r': /* recall */ cp++; cp += format_aux(cp,rec); break; case 'p': /* precision */ cp++; cp += format_aux(cp,prec); break; case 'f': /* fallout */ cp++; cp += format_aux(cp,fall); break; case 'F': /* FBeta */ cp++; beta = strtod(cp,&end); if (end==NULL) { printf("%8.5f",F(prec,rec,1.0)); } else { printf("%8.5f",F(prec,rec,beta)); cp = end; } break; default: fatal("bad format string"); } /* switch */ }/* else if a %-construct */ } /* for cp */ } /* else if k==format_class */ } /* for k */ printf("\n"); if (!formatted) { /* print confusion matrix */ w = entry_width(classes,matrix); for (i=0; i<vmax(classes); i++) printf(" %*.*s ",w,w,class_string(vref(symbol_t,classes,i))); printf(" <-classified as\n"); for (i=0; i<vmax(classes); i++) printf(" %.*s ",w,"---------------"); printf("\n"); for (i=0; i<vmax(classes); i++) { classrow = vref(vec_t,matrix,i); for (j=0; j<vmax(classes); j++) { entry = vref(int,classrow,j); printf(" %*d ",w,*entry); } printf(" %*.*s: %s\n",w,w, class_string(vref(symbol_t,classes,i)), vref(symbol_t,classes,i)->name); } } /* !formatted */ }static int format_aux(cp,p)char *cp;double p;{ if (*cp=='+') { printf("%8.3f",p); return 1; } else { printf("%8.5f",p/100.0); return 0; }}/* set by entry_width, used by class_string */static int Encode_classes;/* width of entries in the matrix */static int entry_width(classes,matrix)vec_t *classes;vec_t *matrix;{ vec_t *row; int i,j; double log10(double); int w,wij,mij; /* find width of entries in table */ w = MIN_ENTRY_WIDTH; for (i=0; i<vmax(matrix); i++) { row = vref(vec_t,matrix,i); for (j=0; j<vmax(row); j++) { mij = *vref(int,row,j); wij = (mij>0) ? log10((double) mij)+1 : 1; w = max(w,wij); } } /* see if classes need to be encoded */ Encode_classes = 0; for (i=0; i<vmax(classes)-1; i++) { for (j=i+1; j<vmax(classes); j++) { if (!strncmp( vref(symbol_t,classes,i)->name, vref(symbol_t,classes,j)->name, w)) { Encode_classes = 1; } } } /* return the width */ return w;}/* width of entries in the matrix */static char *class_string(class)symbol_t *class;{ static char buf[10]; if (Encode_classes) { if (class->index > 26) { sprintf(buf,"%c%d", 'a'+class->index,class->index/26); } else { sprintf(buf,"%c ", 'a'+class->index); } return buf; } else { return class->name; }}/* add a new class, and a new row and column to the matrix */ static void extend_matrix(classes,matrix,class)vec_t *classes;vec_t *matrix;symbol_t *class;{ vec_t *row; int i,zero=0; if (!vmem(symbol_t,classes,class)) { /* create new class */ class->index = vmax(classes); ext_vec(symbol_t,classes,class); /* add new row to matrix */ row = new_vec(int); for (i=0; i<vmax(classes); i++) { ext_vec(int,row,&zero); } ext_vec(vec_t,matrix,row); /* extend all other rows by one entry */ for (i=0; i<vmax(classes)-1; i++) { row = vref(vec_t,matrix,i); ext_vec(int,row,&zero); } }}static double F(prec,recall,beta)double prec,recall,beta;{ double bsq,P,R; bsq = beta*beta; /* convert from percent */ P = prec/100.0; R = recall/100.0; return ((bsq+1.0)*P*R) / (bsq*P + R);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -