⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 siftrules.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*************************************************************************/
/*									 */
/*	Process sets of rules						 */
/*	---------------------					         */
/*								         */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

extern FILE *fLog;
/*********************************************/
/* This is from Ruleinex.i    **/

extern PR	*Rule;		/* production rules */

extern RuleNo	NRules,		/* number of production rules */
		*RuleIndex;	/* index to production rules */

extern short	RuleSpace;	/* space currently allocated for rules */

extern RuleSet	*PRSet;		/* set of rulesets */

extern ClassNo	DefaultClass;	/* default class associated with ruleset */
extern BOOL	SIGTEST	,	/* use significance test in rule pruning */
		SIMANNEAL;	/* use simulated annealing */


extern float	SIGTHRESH,	/* sig level used in rule pruning */
		REDUNDANCY,	/* factor governing encoding tradeoff
				   between rules and exceptions */
		AttTestBits,	/* average bits to encode tested attribute */
		*BranchBits;	/* ditto attribute value */

extern float	*LogItemNo;	/* LogItemNo[i] = log2(i) */
extern double	*LogFact;	/* LogFact[i] = log2(i!) */
/*************************************************/

ItemNo	*ClassFreq,	/* ClassFreq[c]	= no. items of class c  */
	*Covered,	/* Covered[i]	= no. included rules that cover item i */
	*FalsePos,	/* FalsePos[c]	= no. false positives from rules
					  selected for class c */
	*NoRule,	/* NoRule[c]	= no. items covered by no selected rule */

	*Right,		/* Right[r]	= no. correct rule firings */
	*Wrong;		/* Wrong[r]	= no. incorrect rule firings */

float	*Value,		/* Value[r]	= advantage attributable to rule r or
					  realisable if rule r included */
	SubsetValue,	/* value of best class subset so far */
	CodeWeight;	/* multiplying factor for rule encodings */

Boolean	*RuleIn,	/* RuleIn[r]	= true if rule r included */
	*Subset,	/* best class subset so far */
	**Match;	/* Match[r][i]	= true if rule r fires on item i */

RuleNo	*ClassRules;	/* list of all rules for current target class */

ClassNo	FocusClass;


/*************************************************************************/
/*									 */
/*  Construct an ordered subset (indexed by RuleIndex) of the current	 */
/*  set of rules							 */
/*									 */
/*************************************************************************/
void ConstructRuleset()
{
    RuleNo r, OldNRules = NRules;

    /*  Allocate tables  */

    Right = (ItemNo *) calloc(NRules+1, sizeof(ItemNo));
    Wrong = (ItemNo *) calloc(NRules+1, sizeof(ItemNo));

    Value = (float *) calloc(NRules+1, sizeof(float));

    RuleIn = (Boolean *) calloc(NRules+1, sizeof(Boolean));
    Subset = (Boolean *) malloc((NRules+1) * sizeof(Boolean));

    ClassRules = (RuleNo *) malloc((NRules+1) * sizeof(RuleNo));

    ClassFreq = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));

    Covered = (ItemNo *) calloc(MaxItem+1, sizeof(ItemNo));

    Match = (Boolean **) calloc(NRules+1, sizeof(Boolean *));

    FalsePos = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));

    NoRule = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));

    ForEach(r, 1, NRules)
    {
		Match[r] = (Boolean *) calloc(MaxItem+1, sizeof(Boolean));
    }

    /*  Cover each class, then order the classes to give an index of rules  */

    InitialiseTables();

    FindRuleCodes();
    CodeWeight = 0.5;

    ForEach(FocusClass, 0, MaxClass)
    {
		CoverClass();
    }

    MakeIndex();
    FindDefault();

    /*  Clear  */

    delete Value;
    delete RuleIn;
    delete ClassRules;
    delete Subset;
    delete Covered;
    delete FalsePos;
    delete NoRule;
    ForEach(r, 1, OldNRules)
    {
		delete Match[r];
    }
    delete Match;
}

/*************************************************************************/
/*									 */
/*		Initialise all tables used in sifting			 */
/*									 */
/*************************************************************************/
void InitialiseTables()
{
    ItemNo i;
    RuleNo r;
    ClassNo c;

    ForEach(r, 1, NRules)
    {
		RuleIn[r] = false;
		Rule[r].Used = Rule[r].Incorrect = 0;
    }

    ForEach(c, 0, MaxClass)
    {
		ClassFreq[c] = 0;
    }

    ForEach(i, 0, MaxItem)
    {
		ClassFreq[Class(Item[i])]++;

		ForEach(r, 1, NRules)
		{
			Match[r][i] = Strength(Rule[r], Item[i]) > 0.1;

			if ( Match[r][i] )
			{
			Rule[r].Used++;
			if ( Class(Item[i]) != Rule[r].Rhs ) Rule[r].Incorrect++;
			}
		}
    }
}



/*************************************************************************/
/*								         */
/*	Select a subset of the rules for class FocusClass	         */
/*								         */
/*************************************************************************/
void CoverClass()
{
    RuleNo r, RuleCount=0;
    ItemNo i;

    Verbosity(1)
	fprintf(fLog,"\nClass %s\n-----\nAction  Change  Value",
		ClassName[FocusClass]);

    ForEach(i, 0, MaxItem)
    {
		Covered[i] = 0;
    }

    ForEach(r, 1, NRules)
    {
		if ( Rule[r].Rhs == FocusClass )
		{
			RuleCount++;
			ClassRules[RuleCount] = r;
		}
    }

    if ( ! RuleCount )
    {
		return;
    }

    SubsetValue = 1E10;

    if ( RuleCount <= 10 )
    {
		AllCombinations(RuleCount);
    }
    else
    if ( SIMANNEAL )
    {
		SimAnneal(RuleCount);
    }
    else
    {
		SpotSearch(RuleCount);
    }

    memcpy(RuleIn, Subset, NRules+1);
    Verbosity(1) fprintf(fLog,"\n\tBest value %.1f\n", SubsetValue);
}


 
/*************************************************************************/
/*									 */
/*    Try all combinations of rules to find best value			 */
/*									 */
/*************************************************************************/
void AllCombinations(RuleNo NR)    
{
    RuleNo r;

    if ( ! NR )
    {
		CalculateValue();
    }
    else
    {
		r = ClassRules[NR];

		AllCombinations(NR-1);

		AddRule(r);
		AllCombinations(NR-1);

		DeleteRule(r);
		Verbosity(1) fprintf(fLog,"\n");
    }
}



/*************************************************************************/
/*									 */
/*  Find a good subset by simulated annealing				 */
/*									 */
/*************************************************************************/
void SimAnneal(RuleNo RuleCount)
{
    RuleNo r, OutCount;
    short ri, Tries;
    float Temp, Delta;
    Boolean Changed;

    /*  Keep dropping and adding rules until can't improve  */

    for ( Temp = 1000 ; Temp > 0.001 ; Temp *= 0.95 )
    {
		CalculateValue();

		Verbosity(2)
		{
			OutCount = 0;

			ForEach(ri, 1, RuleCount)
			{
				r = ClassRules[ri];

				if ( ! RuleIn[r] )
				{
					if ( ! (OutCount++ % 3) ) fprintf(fLog,"\n\t\t");
					fprintf(fLog,"%d<%d|%d=%.1f> ", r, Right[r], Wrong[r], Value[r]);
				}
			}
			fprintf(fLog,"\n\n");
		}
		
		Changed = false;

		for ( Tries = 100 ; ! Changed && Tries > 0 ; Tries-- )
		{
			/*  Choose a rule to add or delete  */

			ri = RuleCount * Random + 1;

			r = ClassRules[ri];

			Delta = ( RuleIn[r] ? -Value[r] : Value[r] );

			if ( Delta > 0 || Random < exp(Delta / Temp) )
			{
				if ( RuleIn[r] )
				{
					DeleteRule(r);
				}
				else
				{
					AddRule(r);
				}
				
				Changed = true;
			}
		}

		if ( ! Changed ) break;
    }

    /*  Try to improve best subset so far by hill-climbing  */

    Verbosity(1) fprintf(fLog,"Polish: ");
    memcpy(RuleIn, Subset, NRules+1);
    HillClimb(RuleCount);
}



/*************************************************************************/
/*									 */
/*  Find a good subset by repeated greedy search			 */
/*									 */
/*************************************************************************/
void SpotSearch(RuleNo RuleCount)
{
    RuleNo r;
    short ri, Trial;
    float ProbIn;

    ForEach(Trial, 0, 10)
    {
		Verbosity(1) fprintf(fLog,"\n    Trial %d:", Trial);

		/*  Add rules randomly to the initial subset  */

		ProbIn = Trial / 10.0;
		ForEach(ri, 1, RuleCount)
		{
			r = ClassRules[ri];
			RuleIn[r] = Random < ProbIn;
		}

		HillClimb(RuleCount);
    }
}



/*************************************************************************/
/*									 */
/*  Improve a subset of rules by adding and deleting rules		 */
/*									 */
/*************************************************************************/
void HillClimb(RuleNo RuleCount)
{
    RuleNo r, Bestr;
    short ri, OutCount;
    ItemNo i;
    float Delta, BestDelta;

    ForEach(i, 0, MaxItem)
    {
		Covered[i] = 0;
    }

    ForEach(ri, 1, RuleCount)
    {
		r = ClassRules[ri];
		if ( RuleIn[r] )
		{
			ForEach(i, 0, MaxItem)
			{
				if ( Match[r][i] )
				{
					Covered[i]++;
				}
			}
		}
    }
    
    /*  Add or drop rule with greatest reduction in coding cost  */

    while ( true )
    {
		CalculateValue();

		Verbosity(2)
		{
			OutCount = 0;

			ForEach(ri, 1, RuleCount)
			{
				r = ClassRules[ri];

				if ( ! RuleIn[r] )
				{
					if ( ! (OutCount++ % 3) ) fprintf(fLog,"\n\t\t");
					fprintf(fLog,"%d<%d|%d=%.1f> ", r, Right[r], Wrong[r], Value[r]);
				}
			}

			fprintf(fLog,"\n\n");
		}

		Bestr = BestDelta = 0;
		ForEach(ri, 1, RuleCount)
		{
			r = ClassRules[ri];
			Delta = ( RuleIn[r] ? -Value[r] : Value[r] );
			if ( Delta > BestDelta )
			{
				Bestr = r;
				BestDelta = Delta;
			}
		}
		if ( ! Bestr ) break;

		if ( RuleIn[Bestr] )
		{
			DeleteRule(Bestr);
		}
		else
		{
			AddRule(Bestr);
		}
    }
}



/*************************************************************************/
/*								         */
/*  Find the number of correct and incorrect rule firings for rules      */
/*  for class FocusClass and hence determine the Value of the rules.     */
/*  If best so far, remember.						 */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -