⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 siftrules.c

📁 经典的数据挖掘分类算法
💻 C
📖 第 1 页 / 共 2 页
字号:
	{	    SumCover++;	    if( ThisClass != FocusClass ) FPos++;	}	else	if ( ThisClass == FocusClass )	{	    FNeg++;	}    }    ForEach(r, 1, NRules)    {	if ( Rule[r].Rhs == FocusClass )	{	    Right[r] = Wrong[r] = 0;	    if ( RuleIn[r] )	    {		RuleBits += Rule[r].Bits;		Selected++;	    }	    RuleMatch = Match[r];	    ForEach(i, 0, MaxItem)	    {		if ( RuleMatch[i] &&		   ( ! (Times = Covered[i]) || Times == 1 && RuleIn[r] ) )		{		    if ( Class(Item[i]) == FocusClass )		    {			Right[r]++;		    }		    else		    {			Wrong[r]++;		    }		}	    }	}    }    RuleBits -= LogFact[Selected];	/* allow for reordering of rules */    BaseBits = CodeWeight * RuleBits + ExceptionBits(SumCover, FPos, FNeg);    /*  From the Right and Wrong of each rule, calculate its value  */    Verbosity(1)    {        printf("\t");    	InCount = -1;    }    ForEach(r, 1, NRules)    {	if ( Rule[r].Rhs == FocusClass )	{	    if ( RuleIn[r] )	    {		NewBits = ExceptionBits(SumCover-Right[r]-Wrong[r],					FPos-Wrong[r], FNeg+Right[r]) +			  CodeWeight *			      (RuleBits - Rule[r].Bits + LogItemNo[Selected]);		Value[r] = NewBits - BaseBits;	    }	    else	    {		NewBits = ExceptionBits(SumCover+Right[r]+Wrong[r],					FPos+Wrong[r], FNeg-Right[r]) +			  CodeWeight *			      (RuleBits + Rule[r].Bits - LogItemNo[Selected+1]);		Value[r] = BaseBits - NewBits;	    }	    Verbosity(1)	    {	        if ( RuleIn[r] )	        {		    if ( ++InCount && ! (InCount % 3) ) printf("\n\t\t");		    printf("%d[%d|%d=%.1f]  ", r, Right[r], Wrong[r], Value[r]);	        }	    }	}    }    Verbosity(1)    {	printf("\n\t\t%d rules, %d firings: F+=%d, F-=%d, %.1f bits (rules=%.1f)\n",		Selected, SumCover, FPos, FNeg, BaseBits, RuleBits);    }    if ( BaseBits < SubsetValue )    {	SubsetValue = BaseBits;	memcpy(Subset, RuleIn, NRules+1);    }}/*************************************************************************//*								         *//*  Add rule r to the set of included rules and increase the number of	 *//*  rules covering each of the items that fire the rule		         *//*								         *//*************************************************************************/    AddRule(r)/*  -------  */    RuleNo r;{    ItemNo i;    RuleIn[r] = true;    ForEach(i, 0, MaxItem)    {	if ( Match[r][i] )	{	    Covered[i]++;	}    }    Verbosity(1) printf("%5d+  %6.1f", r, Value[r]);}/*************************************************************************//*								         *//*  Delete rule r from the included rules and decrease the number of	 *//*  rules covering each of the items covered by the rule	         *//*								         *//*************************************************************************/    DeleteRule(r)/*  ----------  */    RuleNo r;{    ItemNo i;    RuleIn[r] = false;    ForEach(i, 0, MaxItem)    {	if ( Match[r][i] )	{	    Covered[i]--;	}    }    Verbosity(1) printf("%5d-  %6.1f", r, -Value[r]);}/*************************************************************************//*								         *//*  Make an index of included rules in RuleIndex.  Select first those    *//*  classes whose rules have the fewest false positives.  Within a	 *//*  class, put rules with higher accuracy ahead.			 *//*								         *//*************************************************************************/    MakeIndex()/*  ---------  */{    ClassNo c, BestC, Pass;    RuleNo r, BestR, NewNRules = 0;    ItemNo i;    Boolean *Included;    Included = (Boolean *) calloc(MaxClass+1, sizeof(Boolean));    RuleIndex = (RuleNo *) calloc(NRules+1, sizeof(RuleNo));    Verbosity(1) printf("\nFalsePos  Class\n");    ForEach(i, 0, MaxItem)    {	Covered[i] = 0;    }    /*  Select the best class to put next  */    ForEach(Pass, 0, MaxClass)    {	ForEach(c, 0, MaxClass)	{	    if ( Included[c] ) continue;	    FalsePos[c] = 0;	    ForEach(i, 0, MaxItem)	    {		if ( Covered[i] || Class(Item[i]) == c ) continue;		ForEach(r, 1, NRules)		{		    if ( Rule[r].Rhs == c && RuleIn[r] && Match[r][i] )		    {			FalsePos[c]++;			break;		    }		}	    }	}	BestC = -1;	ForEach(c, 0, MaxClass)	{	    if ( ! Included[c] &&	         ( BestC < 0 || FalsePos[c] < FalsePos[BestC] ) )	    {		BestC = c;	    }	}	Included[BestC] = true;	Verbosity(1)	    printf("%5d     %s\n", FalsePos[BestC], ClassName[BestC]);	/*  Now grab the rules for this class  */	do	{	    BestR = 0;	    /*  Find the best rule to put next  */	    ForEach(r, 1, NRules)	    {		if ( RuleIn[r] && Rule[r].Rhs == BestC &&		     ( ! BestR || Rule[r].Error < Rule[BestR].Error ) )		{		    BestR = r;		}	    }	    if ( BestR )	    {		RuleIndex[++NewNRules] = BestR;		RuleIn[BestR] = false;		ForEach(i, 0, MaxItem)		{		    Covered[i] |= Match[BestR][i];		}	    }	} while ( BestR );    }    NRules = NewNRules;    cfree(Included);}/*************************************************************************//*								         *//*  Find the default class as the one with most items not covered by	 *//*  any rule.  Resolve ties in favour of more frequent classes.		 *//*  (Note: Covered has been set by MakeIndex.)				 *//*								         *//*************************************************************************/    FindDefault()/*  -----------  */{    ClassNo c;    ItemNo i;    /*  Determine uncovered items  */    ForEach(c, 0, MaxClass)    {	NoRule[c] = 0;    }    ForEach(i, 0, MaxItem)    {	if ( ! Covered[i] )	{	    NoRule[Class(Item[i])]++;	}    }    Verbosity(1)    {	printf("\nItems: Uncovered   Class\n");	ForEach(c, 0, MaxClass)	{	    printf("%5d %7d      %s\n", ClassFreq[c], NoRule[c], ClassName[c]);	}	printf("\n");    }    DefaultClass = 0;    ForEach(c, 1, MaxClass)    {	if ( NoRule[c] > NoRule[DefaultClass] ||	     NoRule[c] == NoRule[DefaultClass] &&	     ClassFreq[c] > ClassFreq[DefaultClass] )	{	    DefaultClass = c;	}    }}/*************************************************************************//*								         *//*  Given a rule and a case, determine the strength with which we can    *//*  conclude that the case belongs to the class specified by the rule's  *//*  right-hand side.						         *//*								         *//*  If the case doesn't satisfy all the conditions of the rule,		 *//*  then this is 0.						         *//*								         *//*************************************************************************/float Strength(ThisRule, Case)/*    --------  */    PR ThisRule;    Description Case;{    short d;    Boolean Satisfies();    if ( ThisRule.Error > 0.7 ) return 0.0;    ForEach(d, 1, ThisRule.Size)    {	if ( ! Satisfies(Case, ThisRule.Lhs[d]) )	{	    return 0.0;	}    }    return ( 1 - ThisRule.Error );}/*************************************************************************//*									 *//*  Determine the number of bits to encode exceptions.  Unlike the	 *//*  version in the book, this uses an approximate encoding that 	 *//*  penalizes unbalanced numbers of false positives and false negatives  *//*  as described in my paper at 1995 International Machine Learning      *//*  Conference (published by Morgan Kaufmann).				 *//*									 *//*************************************************************************/float Biased(N, E, ExpE)/*    ------  */    int N, E;    float ExpE;{    float Rate;    if ( ExpE <= 1E-6 )    {	return ( E == 0 ? 0.0 : 1E6 );    }    else    if ( ExpE >= N-1E-6 )    {	return ( E == N ? 0.0 : 1E6 );    }    Rate = ExpE / N;    return -E * Log(Rate) - (N-E) * Log(1-Rate);}float ExceptionBits(Fires, FP, FN)/*    -------------  */    int Fires, FP, FN;{    if ( Fires > 0.5 * (MaxItem+1) )    {	return Log(MaxItem+1)		+ Biased(Fires, FP, 0.5 * (FP+FN))		+ Biased(MaxItem+1-Fires, FN, (float) FN);    }    else    {	return Log(MaxItem+1)		+ Biased(Fires, FP, (float) FP)		+ Biased(MaxItem+1-Fires, FN, 0.5 * (FP+FN));    }}/*************************************************************************//*									 *//*  Find encoding lengths for all rules					 *//*									 *//*************************************************************************/    FindRuleCodes()/*  -------------  */{    RuleNo r;    short d, NCond;    float Bits, CondBits();    ForEach(r, 1, NRules)    {	NCond = Rule[r].Size;	Bits = 0;	ForEach(d, 1, NCond)	{	    Bits += CondBits(Rule[r].Lhs[d]);	}	/*  Must encode the number of conditions, but credit the total	    encoding by the ways conditions can be reordered  */	Rule[r].Bits = Bits + LogItemNo[NCond] - LogFact[NCond];    }}/*************************************************************************//*									 *//*  Determine the number of bits required to encode a condition		 *//*									 *//*************************************************************************/float CondBits(C)/*    --------  */    Condition C;{    Test t;    Attribute a;    t = C->CondTest;    a = t->Tested;    switch ( t->NodeType )    {	case BrDiscr:		/* test of discrete attribute */	case ThreshContin:	/* test of continuous attribute */	    return AttTestBits/REDUNDANCY + BranchBits[a];	case BrSubset:		/* subset test on discrete attribute  */	    return AttTestBits/REDUNDANCY + MaxAttVal[a];    } }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -