⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 genrules.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
字号:
/*************************************************************************/
/*									 */
/*	Generate all rulesets from the decision trees		  	 */
/*	---------------------------------------------		  	 */
/*								  	 */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

extern FILE *fLog;
extern CProgressCtrl	mProgCtrl;

extern PR	*Rule;		/* production rules */

extern RuleNo	NRules,		/* number of production rules */
		*RuleIndex;	/* index to production rules */

extern short	RuleSpace;	/* space currently allocated for rules */

extern RuleSet	*PRSet;		/* set of rulesets */

extern ClassNo	DefaultClass;	/* default class associated with ruleset */

extern Boolean	SIGTEST,	/* use Fisher's test in rule pruning */
		SIMANNEAL;	/* use simulated annealing */

extern float	SIGTHRESH,	/* sig level used in rule pruning */
		REDUNDANCY,	/* factor governing encoding tradeoff
				   between rules and exceptions */
		AttTestBits,	/* average bits to encode tested attribute */
		*BranchBits;	/* ditto attribute value */

extern float	*LogItemNo;	/* LogItemNo[i] = log2(i) */
extern double	*LogFact;	/* LogFact[i] = log2(i!) */
extern ItemNo	*TargetClassFreq;  /* [Boolean] */


ItemNo	*Errors,		/* [Condition] */
	*Total;			/* [Condition] */

float	*Pessimistic,		/* [Condition] */
	*Actual,		/* [Condition] */
	*CondSigLevel;		/* [Condition] */

Boolean	**CondSatisfiedBy,	/* [Condition][ItemNo] */
	*Deleted;		/* [Condition] */

DiscrValue *SingleValue;	/* [Attribute] */

Condition *Stack;

short	MaxDisjuncts,
	MaxDepth;

/*************************************************************************/
/*								  	 */
/*  For each tree, form a set of rules and process them, then form a	 */
/*  composite set of rules from all of these sets.		  	 */
/*  If there is only one tree, then no composite set is formed.	  	 */
/*								  	 */
/*  Rulesets are stored in  PRSet[0]  to  PRSet[TRIALS], where    	 */
/*  PRSet[TRIALS] contains the composite ruleset.		  	 */
/*								  	 */
/*  On completion, the current ruleset is the composite ruleset (if one	 */
/*  has been made), otherwise the ruleset from the single tree. 	 */
/*								  	 */
/*************************************************************************/
void GenerateRules()
{
    Tree DecisionTree;
    short t=0, RuleSetSpace=0, r;

    /*  Find bits to encode attributes and branches  */

    FindTestCodes();

    /*  Now process each decision tree  */

    while ( DecisionTree = GetTree(".unpruned") )
    {
		fprintf(fLog,"\n------------------\n");
		fprintf(fLog,"Processing tree %d\n", t);

		/*  Form a set of rules from the next tree  */

		FormRules(DecisionTree);

		/*  Process the set of rules for this trial  */

		ConstructRuleset();

		fprintf(fLog,"\nFinal rules from tree %d:\n", t);
		PrintIndexedRules();
			
		/*  Make sure there is enough room for the new ruleset  */

		if ( t + 1 >= RuleSetSpace )
		{
			RuleSetSpace += 10;

			if ( RuleSetSpace > 10 )
			{
				PRSet = (RuleSet *) realloc(PRSet, RuleSetSpace * sizeof(RuleSet));
			}
			else
			{
				PRSet = (RuleSet *) malloc(RuleSetSpace * sizeof(RuleSet));
			}

		}

		PRSet[t].SNRules = NRules;
		PRSet[t].SRule = Rule;
		PRSet[t].SRuleIndex = RuleIndex;
		PRSet[t].SDefaultClass = DefaultClass;

		++t;
	}

	if ( ! t )
	{
		Error(0,"ERROR:  can't find any decision trees","");
	}

    TRIALS = t;

    /*  If there is more than one tree in the trees file,
	make a composite ruleset of the rules from all trees  */

    if ( TRIALS > 1 )
    {
		CompositeRuleset();
    }
}



/*************************************************************************/
/*								  	 */
/*	Determine code lengths for attributes and branches		 */
/*								  	 */
/*************************************************************************/
void FindTestCodes()
{
    Attribute Att;
    DiscrValue v, V;
    ItemNo i, *ValFreq;
    int PossibleCuts;
    float Sum, SumBranches=0, p;

    BranchBits  = (float *) malloc((MaxAtt+1) * sizeof(float));

    ForEach(Att, 0, MaxAtt)
	{
		if ( (V = MaxAttVal[Att]) )
		{
			ValFreq = (ItemNo *) calloc(V+1, sizeof(ItemNo));

			ForEach(i, 0, MaxItem)
			{
				ValFreq[DVal(Item[i],Att)]++;
			}

			Sum = 0;
			ForEach(v, 1, V)
			{
				if ( ValFreq[v] )
				{
					Sum += (ValFreq[v] / (MaxItem+1.0)) *
					   (LogItemNo[MaxItem+1] - LogItemNo[ValFreq[v]]);
				}
			}
			delete ValFreq;

			BranchBits[Att] = Sum;
		}
		else
		{
			Quicksort(0, MaxItem, Att, 1);

			PossibleCuts = 1;
			ForEach(i, 1, MaxItem)
			{
				if ( CVal(Item[i],Att) > CVal(Item[i-1],Att) )
				{
					PossibleCuts++;
				}
			}

			BranchBits[Att] = PossibleCuts > 1 ?
					  1 + LogItemNo[PossibleCuts] / 2 : 0 ;
		}

		SumBranches += BranchBits[Att];
	}

    AttTestBits = 0;
    ForEach(Att, 0, MaxAtt)
    {
		if ( (p = BranchBits[Att] / SumBranches) > 0 )
		{
			AttTestBits -= p * log(p) / log(2.0);
		}
    }
}


/*************************************************************************/
/*								  	 */
/*	Form composite ruleset for all trials			  	 */
/*								  	 */
/*************************************************************************/
void  CompositeRuleset()
{
    RuleNo r;
    short t, ri;
    
    InitialiseRules();

    /*  Lump together all the rules from each ruleset  */

    ForEach(t, 0, TRIALS-1)
    {
		ForEach(ri, 1, PRSet[t].SNRules)
		{
			r = PRSet[t].SRuleIndex[ri];
			NewRule(PRSet[t].SRule[r].Lhs, PRSet[t].SRule[r].Size,
				 PRSet[t].SRule[r].Rhs, PRSet[t].SRule[r].Error);
		}
    }

    /*  ... and select a subset in the usual way  */

    ConstructRuleset();

    fprintf(fLog,"\nComposite ruleset:\n");
    PrintIndexedRules();

    PRSet[TRIALS].SNRules    = NRules;
    PRSet[TRIALS].SRule      = Rule;
    PRSet[TRIALS].SRuleIndex = RuleIndex;
    PRSet[TRIALS].SDefaultClass = DefaultClass;
}


/*************************************************************************/
/*								  	 */
/*	Form a set of rules from a decision tree			 */
/*	----------------------------------------			 */
/*								  	 */
/*************************************************************************/
/*************************************************************************/
/*								  	 */
/*	Form a ruleset from decision tree t			  	 */
/*								  	 */
/*************************************************************************/
void FormRules(Tree t)
{
    short i;

    /*  Find essential parameters and allocate storage  */

    MaxDepth = 0;
    MaxDisjuncts = 0;

    TreeParameters(t, 0);

    Actual = (float *) calloc(MaxDepth+2, sizeof(float));
    Total = (ItemNo *) calloc(MaxDepth+2, sizeof(ItemNo));
    Errors = (ItemNo *) calloc(MaxDepth+2, sizeof(ItemNo));
    Pessimistic = (float *) calloc(MaxDepth+2, sizeof(float));

    CondSigLevel = (float *) calloc(MaxDepth+2, sizeof(float));

    TargetClassFreq = (ItemNo *) calloc(2, sizeof(ItemNo));

    Deleted = (Boolean *) calloc(MaxDepth+2, sizeof(Boolean));
    CondSatisfiedBy = (char **) calloc(MaxDepth+2, sizeof(char *));
    Stack = (Condition *) calloc(MaxDepth+2, sizeof(Condition));

    ForEach(i, 0, MaxDepth+1)
    {
		CondSatisfiedBy[i] = (char *) calloc(MaxItem+1, sizeof(char));
		Stack[i] = (Condition) malloc(sizeof(struct CondRec));
    }

    SingleValue = (DiscrValue *) calloc(MaxAtt+1, sizeof(DiscrValue));

    InitialiseRules();

    /*  Extract and prune disjuncts  */

    Scan(t, 0);

    /*  Deallocate storage  */

    ForEach(i, 0, MaxDepth+1)
    {
		delete CondSatisfiedBy[i];
		delete Stack[i];
    }
    delete Deleted;
    delete CondSatisfiedBy;
    delete Stack;

    delete Actual;
    delete Total;
    delete Errors;
    delete Pessimistic;

    delete CondSigLevel;

    delete TargetClassFreq;

}



/*************************************************************************/
/*                                                                	 */
/*  Find the maximum depth and the number of leaves in tree t	  	 */
/*  with initial depth d					  	 */
/*                                                                	 */
/*************************************************************************/
void TreeParameters(Tree t, short d)
{
    DiscrValue v;

    if ( t->NodeType )
    {
		ForEach(v, 1, t->Forks)
		{
			TreeParameters(t->Branch[v], d+1);
		}
    }
    else
    {
		/*  This is a leaf  */

		if ( d > MaxDepth ) 
			MaxDepth = d;
		MaxDisjuncts++;
    }
}



/*************************************************************************/
/*								  	 */
/*  Extract disjuncts from tree t at depth d, and process them	  	 */
/*								  	 */
/*************************************************************************/
void Scan(Tree t,short d)
{
    DiscrValue v;
    short i;
    Condition *Term;
    Test x;

    if ( t->NodeType )
    {
		d++;

		x = (Test) malloc(sizeof(struct TestRec));
		x->NodeType = t->NodeType;
		x->Tested = t->Tested;
		x->Forks = t->Forks;
		x->Cut = ( t->NodeType == ThreshContin ? t->Cut : 0 );
		if ( t->NodeType == BrSubset )
		{
			x->Subset = (Set *) calloc(t->Forks + 1, sizeof(Set));
			ForEach(v, 1, t->Forks)
			{
				x->Subset[v] = t->Subset[v];
			}
		}

		Stack[d]->CondTest = FindTest(x);

		ForEach(v, 1, t->Forks)
		{
			Stack[d]->TestValue = v;
			Scan(t->Branch[v], d);
		}
	} 
	else if ( t->Items >= 1 )
	{
		/*  Leaf of decision tree - construct the set of
 			conditions associated with this leaf and prune  */

		Term = (Condition *) calloc(d+1, sizeof(Condition));
		ForEach(i, 1, d)
		{
			Term[i] = (Condition) malloc(sizeof(struct CondRec));
			Term[i]->CondTest = Stack[i]->CondTest;
			Term[i]->TestValue = Stack[i]->TestValue;
		}

		PruneRule(Term, d, t->Leaf);

		delete Term;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -