⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mdb_count.c

📁 Ripper 分类算法
💻 C
字号:
/****************************************************************************** mdb_count.c - "counting" utilities for datasets******************************************************************************/#include <stdio.h>#include "ripper.h"#include "mdb.h"#define weight(exi) (exi->wt)void count_examples(form,cl,data,pos,neg)vec_t *form;symbol_t *cl;DATA *data;ex_count_t *pos,*neg;{    ex_count_t fp,fn,cov,uncov;    count_rule(form,cl,data,&cov,&uncov,&fp,&fn);    *pos = cov-fp;    *neg = fp;}/*****************************************************************************//* compute coverage of a ruleset[index..end] on data * after ruleset[index] has been replaced with a given form  * return compression obtained by deleting sub-optimal rules * with replace == NULL and index == -1 just this counts the rules*/void count_replaced_ruleset(replace,index,cl,rules,data,pcov,puncov,pfp,pfn)vec_t *replace; /* form to replace rules[index] or NULL for empty rule */vec_t *rules;   symbol_t *cl;int index;      /* assume counts for rules 0...index-1 are ok */DATA *data;    /* assume data is the portion of larger set 		   not covered by rules 0 ... index -1 */ex_count_t *pcov,*puncov,    *pfp,*pfn;  /* side-effected to hold computed counts*/ {    int i,j;    BOOL covered;     example_t *exi;    rule_t *rj;        /* compute fp and cov of early rules */    *pcov = *puncov = *pfp = *pfn = 0;    for (j=0; j<index; j++) {	rj = vref(rule_t,rules,j);	*pcov += rj->nnegx+rj->nposx;	*pfp += rj->nnegx;    }     /* clear counts for later rules */    for (j=index+1; j<vmax(rules); j++) {	rj = vref(rule_t,rules,j);	rj->nnegx = rj->nposx = 0;    }     /* count coverage on data of later rules */    for (i=0; i<vmax(data);i++) {	exi = vref(example_t,data,i);	if (replace!=NULL && form_covers(replace,exi->inst)) {	    *pcov += weight(exi); 	    if (exi->lab.nom != cl) *pfp += weight(exi);	} else {	    /* see if later rule covers exi */	    for (covered=FALSE,j=index+1; j<vmax(rules) && !covered; j++) {		rj = vref(rule_t,rules,j);		if (form_covers(rj->antec,exi->inst)) {		    covered = TRUE;		    *pcov += weight(exi);		    if (exi->lab.nom != rj->conseq) {			*pfp += weight(exi); 			rj->nnegx += weight(exi);		    } else {			rj->nposx += weight(exi);		    }		}	    } 	    if (!covered) {		*puncov += weight(exi);		if (exi->lab.nom==cl) *pfn += weight(exi);	    }	}    } /* for datum i */}/* count members of a class in a dataset*/void count_class_freq(cl,data,p,n)symbol_t *cl;DATA *data;ex_count_t *p, *n;{    example_t *exi;    int i;    (*p) = (*n) = 0;    for (i=0; i<vmax(data); i++) {	exi = vref(example_t,data,i);	if (exi->lab.nom == cl) (*p) += weight(exi);	else (*n) += weight(exi);    }}/* count coverage of a single sentential form*/void count_rule(form,cl,data,pcov,puncov,pfp,pfn)vec_t *form;symbol_t *cl;DATA *data;ex_count_t *pcov, *puncov, *pfp, *pfn;{#define UNOPTIMIZED#ifdef UNOPTIMIZED    int i;    example_t *exi;    *pcov = *puncov = *pfp = *pfn = 0;    for (i=0; i<vmax(data); i++) {	exi = vref(example_t,data,i);	if (form_covers(form,exi->inst)) {	    if (exi->lab.nom != cl) { 		*pfp += weight(exi);	    } 	    *pcov += weight(exi);	} else {	    *puncov += weight(exi);	    if (exi->lab.nom==cl) { 		*pfn += weight(exi);	    } 	}    }#else    gsym_t *condp;    symbol_t *cond_nom;    REAL cond_num;    operator_t cond_op;    aval_t *exi_val;    example_t *exi,*eximax;    BOOL covered;    *pcov = *puncov = *pfp = *pfn = 0;    /* first a special case for length-one forms */    if (vmax(form)==1 && !(vref(gsym_t,form,0)->nonterm)) {	condp = vref(gsym_t,form,0);	cond_num = condp->value.num; 	cond_nom = condp->value.nom;	cond_op = condp->op;	eximax = vbase(example_t,data)+vmax(data);	for (exi=vbase(example_t,data); exi<eximax; exi++) {	    exi_val = vbase(aval_t,exi->inst)+condp->attr_index;	    /* TEST: does condition cover the example? */	    covered = FALSE;	    if (exi_val->kind!=MISSING_VALUE) { 		switch (cond_op) {		  case OPEQ:		    if (exi_val->kind==SYMBOL && exi_val->u.nom==cond_nom) 		      covered = TRUE;		    if (exi_val->kind==CONTINUOUS && exi_val->u.num==cond_num)		      covered = TRUE;		    break;		  case OPNEQ:		    if (exi_val->kind==SYMBOL && exi_val->u.nom!=cond_nom) 		      covered = TRUE;		    if (exi_val->kind==CONTINUOUS && exi_val->u.num!=cond_num)		      covered = TRUE;		    break;		  case OPLE:		    if (exi_val->u.num<=cond_num) 		      covered = TRUE;		    break;		  case OPGE:		    if (exi_val->u.num>=cond_num) 		      covered = TRUE;		    break;		  case OPIN:		    if (contains_symbol(cond_nom,exi_val->u.set))		      covered = TRUE;		    break;		  case OPOUT:		    if (!contains_symbol(cond_nom,exi_val->u.set))		      covered = TRUE;		    break;		  default:		    fatal("unknown operator");		} /* switch cond_op */	    }/* else not missing */	    if (covered) {		*pcov += weight(exi);		if (exi->lab.nom != cl) *pfp += weight(exi);	    } else {		*puncov += weight(exi);				if (exi->lab.nom == cl) *pfn += weight(exi);	    }	} /* for each example */     } else {	/* case for forms of length greater than 1 */	eximax = vbase(example_t,data)+vmax(data);	for (exi=vbase(example_t,data); exi<eximax; exi++) {	    if (form_covers(form,exi->inst)) {		*pcov += weight(exi);		if (exi->lab.nom != cl) *pfp += weight(exi);	    } else {		*puncov += weight(exi);				if (exi->lab.nom == cl) *pfn += weight(exi);	    }	}    }#endif}double error_rate(c,data)concept_t *c;DATA *data;{    example_t *exp,*exmax;    double err;    double tot;    err = tot = 0;    exmax = vbase(example_t,data)+vmax(data);    for (exp=vbase(example_t,data); exp<exmax; exp++) {	tot += weight(exp);	if (classify(c,exp->inst)!=exp->lab.nom) err += weight(exp);	    }    if (tot==0) return 0.0;    else return err/tot;}/*****************************************************************************/BOOL form_covers(sform,inst)vec_t *inst;vec_t *sform;{    register gsym_t *cp,*cpmax;    aval_t *ex_val;    symbol_t *cp_nom;    REAL cp_num;    /* optimized loop over all conditions */    cpmax = vbase(gsym_t,sform)+vmax(sform);    for (cp=vbase(gsym_t,sform); cp<cpmax; cp++) {	if (!cp->nonterm) {	    cp_nom = cp->value.nom;	    cp_num = cp->value.num;	    /* TEST: does condition cover instance? */	    ex_val = vbase(aval_t,inst)+cp->attr_index;	    if (ex_val->kind==MISSING_VALUE) {	      return (cp->op==OPEQ && cp_nom && cp_nom->kind==MISSING_MARK);	    }	    switch (cp->op) {	      case OPEQ:		if (ex_val->kind==SYMBOL && ex_val->u.nom!=cp_nom) return FALSE;		if (ex_val->kind==CONTINUOUS && ex_val->u.num!=cp_num) return FALSE;		break;	      case OPNEQ:		if (ex_val->kind==SYMBOL && ex_val->u.nom==cp_nom) return FALSE;		if (ex_val->kind==CONTINUOUS && ex_val->u.num==cp_num) return FALSE;		break;	      case OPLE:		if (ex_val->u.num>cp_num) return FALSE;		break;	      case OPGE:		if (ex_val->u.num<cp_num) return FALSE;		break;	      case OPIN:		if (!contains_symbol(cp_nom,ex_val->u.set)) return FALSE;		break;	      case OPOUT:		if (contains_symbol(cp_nom,ex_val->u.set)) return FALSE;		break;	      default:		assert(0);	    }/* switch*/	} /* if !nonterm */    } /* for each gsym */    return TRUE;}BOOL contains_symbol(sym,inst)symbol_t *sym;vec_t *inst;{    int i;     for (i=0; i<vmax(inst); i++) {	if (*vref(symbol_t *,inst,i) == sym) return TRUE;    }    return FALSE;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -