⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 value.c

📁 Ripper 分类算法
💻 C
字号:
/****************************************************************************** value.c - compute information gain and other statistics for a sentential form******************************************************************************/#include <stdio.h>#include "ripper.h"static void set_wildcards(vec_t *,symbol_t *,vec_t *,double,ex_count_t,ex_count_t,ex_count_t *,ex_count_t *);static void best_wildcard(atom_t *,gsym_t *,symbol_t *,vec_t *,double,ex_count_t,ex_count_t,ex_count_t *,ex_count_t *);#define supported_wildcard(g) (contains_wildcard(g) && (g)->op!=NULLOP)/* function to optimize */value_f Value_function = &info_gain;/* foil's measure */double info_gain(vec_t *form,double oldinfo,		 ex_count_t oldp,ex_count_t oldn,ex_count_t p,ex_count_t n){    if (oldn>0) return p*(oldinfo - information(p,n));    else return pos_cvg(form,oldinfo,oldp,oldn,p,n);}/* coverage of positive examples */ double pos_cvg(vec_t *form,double oldinfo,	       ex_count_t oldp,ex_count_t oldn,ex_count_t p,ex_count_t n){    if (form!=NULL) {	return ((double) p)/vmax(form);    } else {	return 	(double) p;    }}#define H(p,n) (p*information(p,n)+n*information(n,p))/(p+n)/* * used in Rob's experiments -- largest reduction in entropy * such that entropy of pos case is greater than entropy of * negative case.  This basically assumes that every condition * and its negation are added to rules.*/ double entropy_reduction1(vec_t *form,double oldinfo,			 ex_count_t oldp,ex_count_t oldn,			 ex_count_t p,ex_count_t n){    double H0,Hcov,Huncov;    ex_count_t uncovp,uncovn;    /* counts for examples not covered by the rule */     uncovp = oldp - p;    uncovn = oldn - n;    /* entropies */    H0 = H(oldp,oldn);    Hcov = H(p,n);    Huncov = H(uncovp,uncovn);    if (Hcov<Huncov) return -MAXREAL;    else return H0-Hcov;}/*****************************************************************************/double value(vec_t *form,symbol_t *cl,vec_t *data,vec_t *oldform,	     double old_info,ex_count_t oldp,ex_count_t oldn){    int i,j,k;    ex_count_t p,n;    gsym_t *gsymj;    double val;    static vec_t *newform = NULL;    BOOL contains_wildcard_value;    /* removing a nonterminal doesn't change coverage */    if (vmax(form)<vmax(oldform)) return 0.0;    if (newform==NULL) newform = new_vec(gsym_t);    else clear_vec(gsym_t,newform);    /* compute newform = form\oldform: take advantage of fact      * that oldform = xAz and form = xyz, for some y: A-->* y  */    for (i=0; i<vmax(oldform); i++)       if (!equal(gsym_t,vref(gsym_t,oldform,i),vref(gsym_t,form,i))) 	break;    for (j=0; j<vmax(form)-vmax(oldform)+1; j++) {	gsymj = vref(gsym_t,form,i+j);	if (!gsymj->nonterm) ext_vec(gsym_t,newform,gsymj);    }        /* check that there are some new conditions */    if (vmax(newform)==0) return 0;    /* see if there are any >=x or <=y wildcards in new_form */    contains_wildcard_value=FALSE;    for (j=0; j<vmax(newform) && !contains_wildcard_value; j++) {	if (supported_wildcard(vref(gsym_t,newform,j))) {	    contains_wildcard_value=TRUE;	}    }        if (contains_wildcard_value) {	trace(DBG1) {	    printf("// wildcard values in "); 	    print_form(form);	    printf("\n");	}	/* find thresholds and put them in newform */	set_wildcards(newform,cl,data,old_info,oldp,oldn,&p,&n);	/* copy best wildcards from newform into form */	for (k=0,j=0; j<vmax(form)-vmax(oldform)+1; j++) {	    gsymj = vref(gsym_t,form,i+j);	    if (!gsymj->nonterm) {		vset(gsym_t,form,i+j,vref(gsym_t,newform,k++));	    }	}		trace(DBUG) {	    printf("// wildcards set to "); 	    print_form(form);	    printf("\n");	}    } else {	count_examples(newform,cl,data,&p,&n);     }    val = (*Value_function)(form,old_info,oldp,oldn,p,n);      trace(DBUG) {	printf("// value of"); 	print_form(newform); 	printf(" = %g (%g/%g)\n",val,p,n);    }    return val;}double information(ex_count_t p,ex_count_t n){    if (p==0) return 0.0;    else return Log2((p+n+1.0)/(p+1.0));}/***************************************************************************** stuff to deal with finding values for terminals with wildcard values *****************************************************************************//* set each of the supported wildcard thresholds in form * to the value that optimizes gain on the given data;  * or set each of the wildcard words in form to the word * that optimizes gain *  * return the number of pos/neg examples covered by the * chosen value in pos and neg*/static void set_wildcards(vec_t *form,symbol_t *cl,vec_t *data,double old_info,			  ex_count_t oldp,ex_count_t oldn,			  ex_count_t *pos,ex_count_t *neg)/* ex_count_t *pos, *neg hold output of function */{    int i,j;    static vec_t *tmp_form=NULL;    atom_t best;    vec_t data1;    gsym_t *gsymi;    if (vmax(form)==1) {	/* the easy case -- one wildcard */	gsymi = vref(gsym_t,form,0);	/* find best threshold and associated pos/neg coverage */	best_wildcard(&best,gsymi,cl,data,old_info,oldp,oldn,pos,neg);	/* save the wildcard into form */	gsymi->value.nom = best.nom;	gsymi->value.num = best.num;    } else {	/* the hard case--many wildcard thresholds, maybe some known also */	copy_data(&data1,data);	/* collect the known conditions, and apply to data1 */	if (tmp_form==NULL) tmp_form = new_vecn(gsym_t,vmax(form));	else clear_vec(gsym_t,tmp_form);	for (i=0; i<vmax(form); i++) {	    gsymi = vref(gsym_t,form,i);	    if (!supported_wildcard(gsymi)) {		ext_vec(gsym_t,tmp_form,gsymi);	    }	}	remove_uncovered_examples(tmp_form,&data1);	/* now process the new forms */	for (i=0; i<vmax(form); i++) {	    gsymi = vref(gsym_t,form,i);	    if (supported_wildcard(gsymi)) {		/* set the wildcard values */		best_wildcard(&best,gsymi,cl,&data1,old_info,oldp,oldn,pos,neg);		gsymi->value.nom = best.nom;		gsymi->value.num = best.num;		/* update data1, removing conditions not covered */		clear_vec(gsym_t,tmp_form);		ext_vec(gsym_t,tmp_form,gsymi);		remove_uncovered_examples(tmp_form,&data1);	    }	}    }/* hard case */}static void best_wildcard(atom_t *best,gsym_t *gsym,symbol_t *cl,vec_t *data,			  double old_info,ex_count_t oldp,ex_count_t oldn,			  ex_count_t *pos,ex_count_t *neg)/* atom_t *best returns value *//* ex_count_t *pos, *neg hold output of function */{    if (gsym->op==OPLE || gsym->op==OPGE) {	best->num = best_threshold(gsym,cl,data,old_info,oldp,oldn,pos,neg);	best->nom = NULL;    } else if (gsym->op==OPIN || gsym->op==OPOUT 	       || gsym->op==OPEQ || gsym->op==OPNEQ)     {	best->nom = best_symbol(gsym,cl,data,old_info,oldp,oldn,pos,neg);    } else {      fatal("unimplemented use of '?' in grammar");    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -