⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 siftrules.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*								         */
/*************************************************************************/
void CalculateValue()
{
    RuleNo r, Selected=0, InCount;
    ItemNo i, Times, FPos=0, FNeg=0, SumCover=0;
    float BaseBits, RuleBits=0, NewBits;
    ClassNo ThisClass;
    Boolean *RuleMatch;

    ForEach(i, 0, MaxItem)
    {
		ThisClass = Class(Item[i]);

		if ( Covered[i] )
		{
			SumCover++;
			if( ThisClass != FocusClass ) FPos++;
		}
		else
		if ( ThisClass == FocusClass )
		{
			FNeg++;
		}
    }

    ForEach(r, 1, NRules)
    {
		if ( Rule[r].Rhs == FocusClass )
		{
			Right[r] = Wrong[r] = 0;

			if ( RuleIn[r] )
			{
				RuleBits += Rule[r].Bits;
				Selected++;
			}

			RuleMatch = Match[r];

			ForEach(i, 0, MaxItem)
			{
				if ( RuleMatch[i] &&
					( ! (Times = Covered[i]) || Times == 1 && RuleIn[r] ) )
				{
					if ( Class(Item[i]) == FocusClass )
					{
						Right[r]++;
					}
					else
					{
						Wrong[r]++;
					}
				}
			}
		}
    }

    RuleBits -= LogFact[Selected];	/* allow for reordering of rules */

    BaseBits = CodeWeight * RuleBits + ExceptionBits(SumCover, FPos, FNeg);

    /*  From the Right and Wrong of each rule, calculate its value  */

    Verbosity(1)
    {
        fprintf(fLog,"\t");
    	InCount = -1;
    }

    ForEach(r, 1, NRules)
	{
		if ( Rule[r].Rhs == FocusClass )
		{
			if ( RuleIn[r] )
			{
				NewBits = ExceptionBits(SumCover-Right[r]-Wrong[r],
							FPos-Wrong[r], FNeg+Right[r]) +
					  CodeWeight *
						  (RuleBits - Rule[r].Bits + LogItemNo[Selected]);
				Value[r] = NewBits - BaseBits;
			}
			else
			{
				NewBits = ExceptionBits(SumCover+Right[r]+Wrong[r],
							FPos+Wrong[r], FNeg-Right[r]) +
					  CodeWeight *
						  (RuleBits + Rule[r].Bits - LogItemNo[Selected+1]);
				Value[r] = BaseBits - NewBits;
			}

			Verbosity(1)
			{
				if ( RuleIn[r] )
				{
					if ( ++InCount && ! (InCount % 3) ) fprintf(fLog,"\n\t\t");
					fprintf(fLog,"%d[%d|%d=%.1f]  ", r, Right[r], Wrong[r], Value[r]);
				}
			}
		}
    }

    Verbosity(1)
    {
		fprintf(fLog,"\n\t\t%d rules, %d firings: F+=%d, F-=%d, %.1f bits (rules=%.1f)\n",
			Selected, SumCover, FPos, FNeg, BaseBits, RuleBits);
    }

    if ( BaseBits < SubsetValue )
    {
		SubsetValue = BaseBits;
		memcpy(Subset, RuleIn, NRules+1);
    }
}



/*************************************************************************/
/*								         */
/*  Add rule r to the set of included rules and increase the number of	 */
/*  rules covering each of the items that fire the rule		         */
/*								         */
/*************************************************************************/
void AddRule(RuleNo r)
{
    ItemNo i;

    RuleIn[r] = true;

    ForEach(i, 0, MaxItem)
    {
		if ( Match[r][i] )
		{
			Covered[i]++;
		}
    }

    Verbosity(1) fprintf(fLog,"%5d+  %6.1f", r, Value[r]);
}



/*************************************************************************/
/*								         */
/*  Delete rule r from the included rules and decrease the number of	 */
/*  rules covering each of the items covered by the rule	         */
/*								         */
/*************************************************************************/
void DeleteRule(RuleNo r)
{
    ItemNo i;

    RuleIn[r] = false;

    ForEach(i, 0, MaxItem)
    {
		if ( Match[r][i] )
		{
			Covered[i]--;
		}
    }

    Verbosity(1) fprintf(fLog,"%5d-  %6.1f", r, -Value[r]);
}



/*************************************************************************/
/*								         */
/*  Make an index of included rules in RuleIndex.  Select first those    */
/*  classes whose rules have the fewest false positives.  Within a	 */
/*  class, put rules with higher accuracy ahead.			 */
/*								         */
/*************************************************************************/
void MakeIndex()
{
    ClassNo c, BestC, Pass;
    RuleNo r, BestR, NewNRules = 0;
    ItemNo i;
    Boolean *Included;

    Included = (Boolean *) calloc(MaxClass+1, sizeof(Boolean));
    RuleIndex = (RuleNo *) calloc(NRules+1, sizeof(RuleNo));

    Verbosity(1) fprintf(fLog,"\nFalsePos  Class\n");

    ForEach(i, 0, MaxItem)
    {
		Covered[i] = 0;
    }

    /*  Select the best class to put next  */

    ForEach(Pass, 0, MaxClass)
    {
		ForEach(c, 0, MaxClass)
		{
			if ( Included[c] ) continue;

			FalsePos[c] = 0;

			ForEach(i, 0, MaxItem)
			{
				if ( Covered[i] || Class(Item[i]) == c ) continue;

				ForEach(r, 1, NRules)
				{
					if ( Rule[r].Rhs == c && RuleIn[r] && Match[r][i] )
					{
						FalsePos[c]++;
						break;
					}
				}
			}
		}

		BestC = -1;
		ForEach(c, 0, MaxClass)
		{
			if ( ! Included[c] &&
				 ( BestC < 0 || FalsePos[c] < FalsePos[BestC] ) )
			{
				BestC = c;
			}
		}
		Included[BestC] = true;

		Verbosity(1)
			fprintf(fLog,"%5d     %s\n", FalsePos[BestC], ClassName[BestC]);

		/*  Now grab the rules for this class  */

		do
		{
			BestR = 0;

			/*  Find the best rule to put next  */

			ForEach(r, 1, NRules)
			{
				if ( RuleIn[r] && Rule[r].Rhs == BestC &&
					 ( ! BestR || Rule[r].Error < Rule[BestR].Error ) )
				{
					BestR = r;
				}
			}

			if ( BestR )
			{
				RuleIndex[++NewNRules] = BestR;
				RuleIn[BestR] = false;

				ForEach(i, 0, MaxItem)
				{
					Covered[i] |= Match[BestR][i];
				}
			}
		} while ( BestR );
    }

    NRules = NewNRules;
    delete Included;
}



/*************************************************************************/
/*								         */
/*  Find the default class as the one with most items not covered by	 */
/*  any rule.  Resolve ties in favour of more frequent classes.		 */
/*  (Note: Covered has been set by MakeIndex.)				 */
/*								         */
/*************************************************************************/
void   FindDefault()
{
    ClassNo c;
    ItemNo i;

    /*  Determine uncovered items  */

    ForEach(c, 0, MaxClass)
    {
		NoRule[c] = 0;
    }

    ForEach(i, 0, MaxItem)
    {
		if ( ! Covered[i] )
		{
			NoRule[Class(Item[i])]++;
		}
    }

    Verbosity(1)
    {
		fprintf(fLog,"\nItems: Uncovered   Class\n");
		ForEach(c, 0, MaxClass)
		{
			fprintf(fLog,"%5d %7d      %s\n", ClassFreq[c], NoRule[c], ClassName[c]);
		}
		fprintf(fLog,"\n");
    }

    DefaultClass = 0;
    ForEach(c, 1, MaxClass)
    {
		if ( NoRule[c] > NoRule[DefaultClass] ||
			 NoRule[c] == NoRule[DefaultClass] &&
			 ClassFreq[c] > ClassFreq[DefaultClass] )
		{
			DefaultClass = c;
		}
    }
}



/*************************************************************************/
/*								         */
/*  Given a rule and a case, determine the strength with which we can    */
/*  conclude that the case belongs to the class specified by the rule's  */
/*  right-hand side.						         */
/*								         */
/*  If the case doesn't satisfy all the conditions of the rule,		 */
/*  then this is 0.						         */
/*								         */
/*************************************************************************/


float Strength(PR ThisRule, Description Case)
{
    short d;

    if ( ThisRule.Error > 0.7 ) return 0.0;

    ForEach(d, 1, ThisRule.Size)
    {
		if ( ! Satisfies(Case, ThisRule.Lhs[d]) )
		{
			return 0.0;
		}
    }

    return ( 1 - ThisRule.Error );
}



/*************************************************************************/
/*									 */
/*  Determine the number of bits to encode exceptions.  Unlike the	 */
/*  version in the book, this uses an approximate encoding that 	 */
/*  penalizes unbalanced numbers of false positives and false negatives  */
/*  as described in my paper at 1995 International Machine Learning      */
/*  Conference (published by Morgan Kaufmann).				 */
/*									 */
/*************************************************************************/


float Biased(int N, int E, float ExpE)
{
    float Rate;

    if ( ExpE <= 1E-6 )
    {
		return ( E == 0 ? 0.0 : 1E6 );
    }
    else if ( ExpE >= N-1E-6 )
    {
		return ( E == N ? 0.0 : 1E6 );
    }

    Rate = ExpE / N;
    return -E * Log(Rate) - (N-E) * Log(1-Rate);
}



float ExceptionBits(int Fires, int FP, int FN)
{
    if ( Fires > 0.5 * (MaxItem+1) )
    {
		return Log(MaxItem+1)
			+ Biased(Fires, FP, 0.5 * (FP+FN))
			+ Biased(MaxItem+1-Fires, FN, (float) FN);
    }
    else
    {
		return Log(MaxItem+1)
			+ Biased(Fires, FP, (float) FP)
			+ Biased(MaxItem+1-Fires, FN, 0.5 * (FP+FN));
    }
}



/*************************************************************************/
/*									 */
/*  Find encoding lengths for all rules					 */
/*									 */
/*************************************************************************/
void FindRuleCodes()
{
    RuleNo r;
    short d, NCond;
    float Bits;

    ForEach(r, 1, NRules)
    {
		NCond = Rule[r].Size;
		Bits = 0;

		ForEach(d, 1, NCond)
		{
			Bits += CondBits(Rule[r].Lhs[d]);
		}

		/*  Must encode the number of conditions, but credit the total
			encoding by the ways conditions can be reordered  */

		Rule[r].Bits = Bits + LogItemNo[NCond] - LogFact[NCond];
    }
}



/*************************************************************************/
/*									 */
/*  Determine the number of bits required to encode a condition		 */
/*									 */
/*************************************************************************/
float CondBits(Condition C)
{
    Test t;
    Attribute a;

    t = C->CondTest;
    a = t->Tested;

    switch ( t->NodeType )
    {
	case BrDiscr:		/* test of discrete attribute */
	case ThreshContin:	/* test of continuous attribute */
		return AttTestBits/REDUNDANCY + BranchBits[a];
	case BrSubset:		/* subset test on discrete attribute  */
		return AttTestBits/REDUNDANCY + MaxAttVal[a];
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -