📄 c4.5rulesgt.c

📁 决策树c4.5算法的c++实现希望对大家有用
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
/*  ----------  */    RuleNo r;{    ItemNo i;    RuleIn[r] = false;    ForEach(i, 0, MaxItem)    {	if ( Match[r][i] )	{	    Covered[i]--;	}    }    Verbosity(1) printf("%5d-  %6.1f", r, -Value[r]);}/*************************************************************************//*								         *//*  Make an index of included rules in RuleIndex.  Select first those    *//*  classes whose rules have the fewest false positives.  Within a	 *//*  class, put rules with higher accuracy ahead.			 *//*								         *//*************************************************************************/    MakeIndex()/*  ---------  */{    ClassNo c, BestC, Pass;    RuleNo r, BestR, NewNRules = 0;    ItemNo i;    Boolean *Included;    Included = (Boolean *) calloc(MaxClass+1, sizeof(Boolean));    RuleIndex = (RuleNo *) calloc(NRules+1, sizeof(RuleNo));    Verbosity(1) printf("\nFalsePos  Class\n");    ForEach(i, 0, MaxItem)    {	Covered[i] = 0;    }    /*  Select the best class to put next  */    ForEach(Pass, 0, MaxClass)    {	ForEach(c, 0, MaxClass)	{	    if ( Included[c] ) continue;	    FalsePos[c] = 0;	    ForEach(i, 0, MaxItem)	    {		if ( Covered[i] || Class(Item[i]) == c ) continue;		ForEach(r, 1, NRules)		{		    if ( Rule[r].Rhs == c && RuleIn[r] && Match[r][i] )		    {			FalsePos[c]++;			break;		    }		}	    }	}	BestC = -1;	ForEach(c, 0, MaxClass)	{	    if ( ! Included[c] &&	         ( BestC < 0 || FalsePos[c] < FalsePos[BestC] ) )	    {		BestC = c;	    }	}	Included[BestC] = true;	Verbosity(1)	    printf("%5d     %s\n", FalsePos[BestC], ClassName[BestC]);	/*  Now grab the rules for this class  */	do	{	    BestR = 0;	    /*  Find the best rule to put next  */	    ForEach(r, 1, NRules)	    {		if ( RuleIn[r] && Rule[r].Rhs == BestC &&		     ( ! BestR || Rule[r].Error < Rule[BestR].Error ) )		{		    BestR = r;		}	    }	    if ( BestR )	    {		RuleIndex[++NewNRules] = BestR;		RuleIn[BestR] = false;		ForEach(i, 0, MaxItem)		{		    Covered[i] |= Match[BestR][i];		}	    }	} while ( BestR );    }    NRules = NewNRules;    free(Included);}/*************************************************************************//*								         *//*  Find the default class as the one with most items not covered by	 *//*  any rule.  Resolve ties in favour of more frequent classes.		 *//*  (Note: Covered has been set by MakeIndex.)				 *//*								         *//*************************************************************************/    FindDefault()/*  -----------  */{    ClassNo c;    ItemNo i;    /*  Determine uncovered items  */    ForEach(c, 0, MaxClass)    {	NoRule[c] = 0;    }    ForEach(i, 0, MaxItem)    {	if ( ! Covered[i] )	{	    NoRule[Class(Item[i])]++;	}    }    Verbosity(1)    {	printf("\nItems: Uncovered   Class\n");	ForEach(c, 0, MaxClass)	{	    printf("%5d %7d      %s\n", ClassFreq[c], NoRule[c], ClassName[c]);	}	printf("\n");    }    DefaultClass = 0;    ForEach(c, 1, MaxClass)    {	if ( NoRule[c] > NoRule[DefaultClass] ||	     NoRule[c] == NoRule[DefaultClass] &&	     ClassFreq[c] > ClassFreq[DefaultClass] )	{	    DefaultClass = c;	}    }}/*************************************************************************//*								         *//*  Given a rule and a case, determine the strength with which we can    *//*  conclude that the case belongs to the class specified by the rule's  *//*  right-hand side.						         *//*								         *//*  If the case doesn't satisfy all the conditions of the rule,		 *//*  then this is 0.						         *//*								         *//*************************************************************************/float Strength(ThisRule, Case)/*    --------  */    PR ThisRule;    Description Case;{    short d;    Boolean Satisfies();    if ( ThisRule.Error > 0.7 ) return 0.0;    ForEach(d, 1, ThisRule.Size)    {	if ( ! Satisfies(Case, ThisRule.Lhs[d]) )	{	    return 0.0;	}    }    return ( 1 - ThisRule.Error );}/*************************************************************************//*									 *//*  Determine the number of bits to encode exceptions.  Unlike the	 *//*  version in the book, this uses an approximate encoding that 	 *//*  penalizes unbalanced numbers of false positives and false negatives  *//*  as described in my paper at 1995 International Machine Learning      *//*  Conference (published by Morgan Kaufmann).				 *//*									 *//*************************************************************************/float Biased(N, E, ExpE)/*    ------  */    int N, E;    float ExpE;{    float Rate;    if ( ExpE <= 1E-6 )    {	return ( E == 0 ? 0.0 : 1E6 );    }    else    if ( ExpE >= N-1E-6 )    {	return ( E == N ? 0.0 : 1E6 );    }    Rate = ExpE / N;    return -E * Log(Rate) - (N-E) * Log(1-Rate);}float ExceptionBits(Fires, FP, FN)/*    -------------  */    int Fires, FP, FN;{    if ( Fires > 0.5 * (MaxItem+1) )    {	return Log(MaxItem+1)		+ Biased(Fires, FP, 0.5 * (FP+FN))		+ Biased(MaxItem+1-Fires, FN, (float) FN);    }    else    {	return Log(MaxItem+1)		+ Biased(Fires, FP, (float) FP)		+ Biased(MaxItem+1-Fires, FN, 0.5 * (FP+FN));    }}/*************************************************************************//*									 *//*  Find encoding lengths for all rules					 *//*									 *//*************************************************************************/    FindRuleCodes()/*  -------------  */{    RuleNo r;    short d, NCond;    float Bits, CondBits();    ForEach(r, 1, NRules)    {	NCond = Rule[r].Size;	Bits = 0;	ForEach(d, 1, NCond)	{	    Bits += CondBits(Rule[r].Lhs[d]);	}	/*  Must encode the number of conditions, but credit the total	    encoding by the ways conditions can be reordered  */	Rule[r].Bits = Bits + LogItemNo[NCond] - LogFact[NCond];    }}/*************************************************************************//*									 *//*  Determine the number of bits required to encode a condition		 *//*									 *//*************************************************************************/float CondBits(C)/*    --------  */    Condition C;{    Test t;    Attribute a;    t = C->CondTest;    a = t->Tested;    switch ( t->NodeType )    {	case BrDiscr:		/* test of discrete attribute */	case ThreshContin:	/* test of continuous attribute */	    return AttTestBits/REDUNDANCY + BranchBits[a];	case BrSubset:		/* subset test on discrete attribute  */	    return AttTestBits/REDUNDANCY + MaxAttVal[a];    } }/*************************************************************************//*									 *//*	Evaluatation of rulesets					 *//*	------------------------					 *//*									 *//*************************************************************************//*************************************************************************//*									 *//*	Evaluate all rulesets						 *//*									 *//*************************************************************************/    EvaluateRulesets(DeleteRules)/*  ----------------  */    Boolean DeleteRules;{    short t;    ItemNo *Errors, Interpret();    float AvSize=0, AvErrs=0;    Boolean Final;    if ( TRIALS == 1 )    {	/*  Evaluate current ruleset as there is no composite ruleset  */	Interpret(0, MaxItem, DeleteRules, true, true);	return;    }    Errors = (ItemNo *) malloc((TRIALS+1) * sizeof(ItemNo));    ForEach(t, 0, TRIALS)    {	NRules    = PRSet[t].SNRules;	Rule      = PRSet[t].SRule;	RuleIndex = PRSet[t].SRuleIndex;	DefaultClass = PRSet[t].SDefaultClass;	if ( t < TRIALS )	{	    printf("\nRuleset %d:\n", t);	}	else	{	    printf("\nComposite ruleset:\n");	}	Final = (t == TRIALS);	Errors[t] = Interpret(0, MaxItem, DeleteRules, Final, Final);	AvSize += NRules;	AvErrs += Errors[t];	if ( DeleteRules )	{	    PRSet[t].SNRules = NRules;	}    }    /*  Print report  */    printf("\n");    printf("Trial   Size      Errors\n");    printf("-----   ----      ------\n");    ForEach(t, 0, TRIALS)    {	if ( t < TRIALS )	{	    printf("%4d", t);	}	else	{	    printf("  **");	}	printf("    %4d  %3d(%4.1f%%)\n",	      PRSet[t].SNRules, Errors[t], 100 * Errors[t] / (MaxItem+1.0));    }    AvSize /= TRIALS + 1;    AvErrs /= TRIALS + 1;    printf("\t\t\t\tAv size = %.1f,  av errors = %.1f (%.1f%%)\n",	   AvSize, AvErrs, 100 * AvErrs / (MaxItem+1.0));}/*************************************************************************//*									 *//*	Evaluate current ruleset					 *//*									 *//*************************************************************************/float	Confidence;		/* certainty factor of fired rule */				/* (set by BestRuleIndex) */ItemNo Interpret(Fp, Lp, DeleteRules, CMInfo, Arrow)/*     ---------  */    ItemNo Fp, Lp;    Boolean DeleteRules, CMInfo, Arrow;{    ItemNo i, Tested=0, Errors=0, *Better, *Worse, *ConfusionMat;    Boolean FoundRule;    ClassNo AssignedClass, AltClass;    Attribute Att;    RuleNo p, Bestr, ri, ri2, riDrop=0, BestRuleIndex();    float ErrorRate, BestRuleConfidence;    if ( CMInfo )    {	ConfusionMat = (ItemNo *) calloc((MaxClass+1)*(MaxClass+1), sizeof(ItemNo));    }    ForEach(ri, 1, NRules)    {	p = RuleIndex[ri];	Rule[p].Used = Rule[p].Incorrect = 0;    }    Better = (ItemNo *) calloc(NRules+1, sizeof(ItemNo));    Worse  = (ItemNo *) calloc(NRules+1, sizeof(ItemNo));    ForEach(i, Fp, Lp)    {	/*  Find first choice for rule for this item  */	ri = BestRuleIndex(Item[i], 1);	Bestr = ( ri ? RuleIndex[ri] : 0 );	FoundRule = Bestr > 0;	if ( FoundRule )	{	    Rule[Bestr].Used++;	    AssignedClass =  Rule[Bestr].Rhs;	    BestRuleConfidence = Confidence;	    /*  Now find second choice  */	    ri2 = BestRuleIndex(Item[i], ri+1);	    AltClass = ( ri2 ? Rule[RuleIndex[ri2]].Rhs : DefaultClass );	    if ( AltClass != AssignedClass )	    {		if ( AssignedClass == Class(Item[i]) )		{		    Better[ri]++;		}		else		if ( AltClass == Class(Item[i]) )		{		    Worse[ri]++;		}	    }	}	else	{	    AssignedClass = DefaultClass;	}		if ( CMInfo )	{	    ConfusionMat[Class(Item[i])*(MaxClass+1)+AssignedClass]++;	}	Tested++;	if ( AssignedClass != Class(Item[i]) )	{	    Errors++;	    if ( FoundRule ) Rule[Bestr].Incorrect++;	    Verbosity(3)	    {	    	printf("\n");	    	ForEach(Att, 0, MaxAtt)	    	{	    	    printf("\t%s: ", AttName[Att]);	    	    if ( MaxAttVal[Att] )	    	    {	    		if ( DVal(Item[i],Att) )			{	    		    printf("%s\n", AttValName[Att][DVal(Item[i],Att)]);			}	    		else			{	    		    printf("?\n");			}	    	    }	    	    else	    	    {	    		if ( CVal(Item[i],Att) != Unknown )			{	    		    printf("%g\n", CVal(Item[i],Att));			}	    		else			{	    		    printf("?\n");			}	    	    }	    	}	    	printf("\t%4d:\tGiven class %s,", i, ClassName[Class(Item[i])]);	    	if ( FoundRule )	    	{	    	    printf(" rule %d [%.1f%%] gives class ",	    		    Bestr, 100 * BestRuleConfidence);	    	}	    	else		{	    	    printf(" default class ");		}	    	printf("%s\n", ClassName[AssignedClass]);	    }	}    }    printf("\nRule  Size  Error  Used  Wrong\t          Advantage\n");    printf(  "----  ----  -----  ----  -----\t          ---------\n");    ForEach(ri, 1, NRules)    {	p = RuleIndex[ri];	if ( Rule[p].Used > 0 )	{	    ErrorRate = Rule[p].Incorrect / (float) Rule[p].Used;	    printf("%4d%6d%6.1f%%%6d%7d (%.1f%%)\t%6d (%d|%d) \t%s\n",		    p, Rule[p].Size,		    100 * Rule[p].Error, Rule[p].Used, Rule[p].Incorrect,		    100 * ErrorRate,		    Better[ri]-Worse[ri], Better[ri], Worse[ri],		    ClassName[Rule[p].Rhs]);	    /*  See whether this rule should be dropped.  Note: can only drop		one rule at a time, because Better and Worse are affected  */	    if ( DeleteRules && ! riDrop && Worse[ri] > Better[ri] )	    {		riDrop = ri;	    }	}    }    free(Better);    free(Worse);    if ( riDrop )    {	printf("\nDrop rule %d\n", RuleIndex[riDrop]);	ForEach(ri, riDrop+1, NRules)	{	    RuleIndex[ri-1] = RuleIndex[ri];	}	NRules--;    	if ( CMInfo ) free(ConfusionMat);	return Interpret(Fp, Lp, DeleteRules, true, Arrow);    }    else    {	printf("\nTested %d, errors %d (%.1f%%)%s\n",	    Tested, Errors, 100 * Errors / (float) Tested,	    ( Arrow ? "   <<" : "" ));    }    if ( CMInfo )    {	PrintConfusionMatrix(ConfusionMat);	free(ConfusionMat);    }    return Errors;
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -