⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 summarize-main.c

📁 Ripper 分类算法
💻 C
字号:
/****************************************************************************** summarize-main.c - driver program for summarization program******************************************************************************/#include <stdio.h>#include <math.h>#include "ripper.h"/*****************************************************************************/#define MIN_ENTRY_WIDTH 2#define PCT(num,denom,def)  (denom!=0.0 ? (num*100.0)/(denom) : (def)) static void extend_matrix(vec_t *,vec_t *,symbol_t *);static char *class_string(symbol_t *);static int entry_width(vec_t *,vec_t *);static symbol_t *Stop=NULL;static symbol_t *Sep=NULL;static symbol_t *Colon=NULL;static double F(double,double,double);static int format_aux(char *,double);/******************************************************************************/char *Program="summarize";char *Help_str[] = {    "syntax: summarize [options] stem",    "  summarize a prediction file",    "",    "options are:",    "  -c n:    if printing one-line summary, use class n (0-k)",    "  -f fmt:  print one-line formatted summary of one class",    "           fmt is a string containing these control characters",    "              %c -  print class name",    "              %s -  print file stem",    "              %t -  print total number of examples",    "              %e -  print error rate",    "              %e+ - print error rate in percent",    "              %e# - print number of errors",    "              %r -  print recall (r+, in percent)",    "              %p -  print precision (p+, in percent)",    "              %f -  print fallout (f+, in percent)",    "              %Fn - print F[beta] for beta=n (default 1)",     NULL};/*****************************************************************************/int main(argc,argv)int argc;char *argv[];{    char *stem;    int o;    int i,j,k;    int mij,fp,fn,tp,tn;    int w;    int errs,tot;    double prec,rec,fall;    symbol_t *pred_class,*actual_class;    char pred_str[BUFSIZ],actual_str[BUFSIZ];    double p,n;    vec_t *classes; /* vector of symbols */    vec_t *matrix;  /* vector of vectors of integers */    vec_t *classrow;  /* vector of vectors of integers */    int *entry;    char format_string[BUFSIZ];    char *cp,*end;    double beta;    BOOL formatted;    BOOL format_class;    int toti,totc;    formatted = FALSE;    strcpy(format_string,"r%p%f%F0.5F1F2c");    format_class = NULLINDEX;    while ((o=getopt(argc,argv,"f:c:h"))!=EOF) {	switch (o) {	  case 'f':	    formatted=TRUE;	    strcpy(format_string,optarg);	    break;	  case 'c':	    format_class = atoi(optarg);	    break;	  case 'h':	  case '?':	  default: 	    give_help();	    if (o=='h') exit(0);	    else fatal("option not implemented");	}    }    tot = errs = 0;    classes = new_vec(symbol_t);    matrix = new_vec(vec_t);    if (optind<argc) {	stem = argv[optind++];	if (ld_names(add_ext(stem,".names"))) {	    for (i=0; i<vmax(Classes); i++) {		actual_class = vref(atom_t,Classes,i)->nom;		extend_matrix(classes,matrix,actual_class);	    }	}    }  else {	stem = "stdin";    }    if (optind<argc) {	warning("not all arguments were used: %s ...",argv[optind]);    }        while (scanf("%s %lf %lf %s\n",pred_str,&p,&n,actual_str)!=EOF) {	tot++;	pred_class = intern(pred_str);	actual_class = intern(actual_str);	extend_matrix(classes,matrix,pred_class);	extend_matrix(classes,matrix,actual_class);	if (pred_class!=actual_class) errs++;	classrow = vref(vec_t,matrix,actual_class->index);	entry = vref(int,classrow,pred_class->index);	(*entry)++;    }    if (formatted && format_class==NULLINDEX) {	/* figure out minority class */	totc = tot+1;	for (i=0; i<vmax(classes); i++) {	    toti = 0;	    classrow = vref(vec_t,matrix,i);	    for (j=0; j<vmax(classrow); j++) {		mij = *vref(int,classrow,j);		toti += mij;	    }	    if (toti<totc) {		format_class = i;		totc = toti;	    }	}    }    /* print summary */    if (!formatted) {	printf("%d errors in %d trials (%.2f%%)\n\n",	       errs,tot,PCT(errs,tot,0.0));    	if (tot==0) exit(0);	/* print recall precision numbers */	printf("recall\tprecis.\tfallout\tF[0.5]\tF[1.0]\tF[2.0]\tfocus class\n");	printf("------\t-------\t-------\t------\t-------\t-------\t-----------\n");    }    for (k=0; k<vmax(classes); k++) {	/* compute stats for class k */ 	fp = tp = fn = tn = 0;	for (i=0; i<vmax(classes); i++) {	    classrow = vref(vec_t,matrix,i);	    for (j=0; j<vmax(classrow); j++) {		mij = *vref(int,classrow,j);		if (i==k && j==k) tp = mij;		else if (i==k && j!=k) fn += mij;		else if (i!=k && j!=k) tn += mij;		else if (i!=k && j==k) fp += mij;	    }	}	rec = PCT(tp,tp+fn,0.0);	prec = PCT(tp,tp+fp,100.0);	fall = PCT(fp,fp+tn,0.0);	/* print out stats */	if (!formatted) {	    printf("%6.2f\t%6.2f\t%6.2f\t%6.4f\t%6.4f\t%6.4f\t%s\n",		   rec,prec,fall,		   F(prec,rec,0.5),F(prec,rec,1.0),F(prec,rec,2.0),		   vref(symbol_t,classes,k)->name);	} else if (k==format_class) {	    cp = format_string;	    while (*cp) {		if (*cp!='%') putchar(*cp++);		else {		    cp++;		    switch (*cp) {		      case '%':			putchar(*cp);			cp++;			break;		      case 'c':  /* class */			printf("%-8.8s",vref(symbol_t,classes,k)->name);			cp++;			break;		      case 's':  /* filestem */			printf("%-8.8s",stem);			cp++;			break;		      case 't': /* total */			printf("%8d",tot);			cp++;		    			break;		      case 'e': /* errors */			cp++;			if (*cp=='#') /* count */ {			    printf("%8d",errs);			    cp++;			} else {			    cp += format_aux(cp,PCT(errs,tot,0.0));			}			break;		      case 'r': /* recall */ 			cp++;			cp += format_aux(cp,rec);			break;		      case 'p': /* precision */ 			cp++;			cp += format_aux(cp,prec);			break;		      case 'f': /* fallout */ 			cp++;			cp += format_aux(cp,fall);			break;		      case 'F': /* FBeta */ 			cp++;			beta = strtod(cp,&end); 			if (end==NULL) {			    printf("%8.5f",F(prec,rec,1.0));			} else {			    printf("%8.5f",F(prec,rec,beta));			    cp = end;			}			break;		      default:			fatal("bad format string");		    } /* switch */ 		}/* else if a %-construct */	    } /* for cp */	} /* else if k==format_class */     } /* for k */    printf("\n");    if (!formatted) {	/* print confusion matrix */	w = entry_width(classes,matrix);	for (i=0; i<vmax(classes); i++) 	  printf(" %*.*s ",w,w,class_string(vref(symbol_t,classes,i)));	printf("    <-classified as\n");	for (i=0; i<vmax(classes); i++) printf(" %.*s ",w,"---------------");	printf("\n");	for (i=0; i<vmax(classes); i++) {	    classrow = vref(vec_t,matrix,i);	    for (j=0; j<vmax(classes); j++) {		entry = vref(int,classrow,j);		printf(" %*d ",w,*entry);	    }	    printf("    %*.*s: %s\n",w,w,		   class_string(vref(symbol_t,classes,i)),		   vref(symbol_t,classes,i)->name);	}    } /* !formatted */ }static int format_aux(cp,p)char *cp;double p;{    if (*cp=='+') {	printf("%8.3f",p);	return 1;    } else {	printf("%8.5f",p/100.0);	return 0;    }}/* set by entry_width, used by class_string */static int Encode_classes;/* width of entries in the matrix */static int entry_width(classes,matrix)vec_t *classes;vec_t *matrix;{    vec_t *row;    int i,j;    double log10(double);    int w,wij,mij;        /* find width of entries in table */    w = MIN_ENTRY_WIDTH;     for (i=0; i<vmax(matrix); i++) {	row = vref(vec_t,matrix,i); 	for (j=0; j<vmax(row); j++) {	    mij = *vref(int,row,j);	    wij = (mij>0) ? log10((double) mij)+1 : 1; 	    w = max(w,wij); 	}    }    /* see if classes need to be encoded */    Encode_classes = 0;    for (i=0; i<vmax(classes)-1; i++) {	for (j=i+1; j<vmax(classes); j++) {	    if (!strncmp(		   vref(symbol_t,classes,i)->name,			   vref(symbol_t,classes,j)->name,			   w))	    {		Encode_classes = 1;	    }	}    }    /* return the width */    return w;}/* width of entries in the matrix */static char *class_string(class)symbol_t *class;{    static char buf[10];    if (Encode_classes) {	if (class->index > 26) {	    sprintf(buf,"%c%d",		    'a'+class->index,class->index/26);	} else {	    sprintf(buf,"%c ",		    'a'+class->index);	}	return buf;    } else {	return class->name;    }}/* add a new class, and a new row and column to the matrix */ static void extend_matrix(classes,matrix,class)vec_t *classes;vec_t *matrix;symbol_t *class;{    vec_t *row;      int i,zero=0;     if (!vmem(symbol_t,classes,class)) {	/* create new class */ 	class->index = vmax(classes);	ext_vec(symbol_t,classes,class);	/* add new row to matrix */	row = new_vec(int);	for (i=0; i<vmax(classes); i++) {	    ext_vec(int,row,&zero);	}	ext_vec(vec_t,matrix,row);	/* extend all other rows by one entry */	for (i=0; i<vmax(classes)-1; i++) {	    row = vref(vec_t,matrix,i);	    ext_vec(int,row,&zero);	}    }}static double F(prec,recall,beta)double prec,recall,beta;{    double bsq,P,R;    bsq = beta*beta;    /* convert from percent */    P = prec/100.0;    R = recall/100.0;        return ((bsq+1.0)*P*R) / (bsq*P + R);} 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -