📄 genrules.cpp
字号:
/*************************************************************************/
/* */
/* Generate all rulesets from the decision trees */
/* --------------------------------------------- */
/* */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
extern FILE *fLog;
extern CProgressCtrl mProgCtrl;
extern PR *Rule; /* production rules */
extern RuleNo NRules, /* number of production rules */
*RuleIndex; /* index to production rules */
extern short RuleSpace; /* space currently allocated for rules */
extern RuleSet *PRSet; /* set of rulesets */
extern ClassNo DefaultClass; /* default class associated with ruleset */
extern Boolean SIGTEST, /* use Fisher's test in rule pruning */
SIMANNEAL; /* use simulated annealing */
extern float SIGTHRESH, /* sig level used in rule pruning */
REDUNDANCY, /* factor governing encoding tradeoff
between rules and exceptions */
AttTestBits, /* average bits to encode tested attribute */
*BranchBits; /* ditto attribute value */
extern float *LogItemNo; /* LogItemNo[i] = log2(i) */
extern double *LogFact; /* LogFact[i] = log2(i!) */
extern ItemNo *TargetClassFreq; /* [Boolean] */
ItemNo *Errors, /* [Condition] */
*Total; /* [Condition] */
float *Pessimistic, /* [Condition] */
*Actual, /* [Condition] */
*CondSigLevel; /* [Condition] */
Boolean **CondSatisfiedBy, /* [Condition][ItemNo] */
*Deleted; /* [Condition] */
DiscrValue *SingleValue; /* [Attribute] */
Condition *Stack;
short MaxDisjuncts,
MaxDepth;
/*************************************************************************/
/* */
/* For each tree, form a set of rules and process them, then form a */
/* composite set of rules from all of these sets. */
/* If there is only one tree, then no composite set is formed. */
/* */
/* Rulesets are stored in PRSet[0] to PRSet[TRIALS], where */
/* PRSet[TRIALS] contains the composite ruleset. */
/* */
/* On completion, the current ruleset is the composite ruleset (if one */
/* has been made), otherwise the ruleset from the single tree. */
/* */
/*************************************************************************/
void GenerateRules()
{
Tree DecisionTree;
short t=0, RuleSetSpace=0, r;
/* Find bits to encode attributes and branches */
FindTestCodes();
/* Now process each decision tree */
while ( DecisionTree = GetTree(".unpruned") )
{
fprintf(fLog,"\n------------------\n");
fprintf(fLog,"Processing tree %d\n", t);
/* Form a set of rules from the next tree */
FormRules(DecisionTree);
/* Process the set of rules for this trial */
ConstructRuleset();
fprintf(fLog,"\nFinal rules from tree %d:\n", t);
PrintIndexedRules();
/* Make sure there is enough room for the new ruleset */
if ( t + 1 >= RuleSetSpace )
{
RuleSetSpace += 10;
if ( RuleSetSpace > 10 )
{
PRSet = (RuleSet *) realloc(PRSet, RuleSetSpace * sizeof(RuleSet));
}
else
{
PRSet = (RuleSet *) malloc(RuleSetSpace * sizeof(RuleSet));
}
}
PRSet[t].SNRules = NRules;
PRSet[t].SRule = Rule;
PRSet[t].SRuleIndex = RuleIndex;
PRSet[t].SDefaultClass = DefaultClass;
++t;
}
if ( ! t )
{
Error(0,"ERROR: can't find any decision trees","");
}
TRIALS = t;
/* If there is more than one tree in the trees file,
make a composite ruleset of the rules from all trees */
if ( TRIALS > 1 )
{
CompositeRuleset();
}
}
/*************************************************************************/
/* */
/* Determine code lengths for attributes and branches */
/* */
/*************************************************************************/
void FindTestCodes()
{
Attribute Att;
DiscrValue v, V;
ItemNo i, *ValFreq;
int PossibleCuts;
float Sum, SumBranches=0, p;
BranchBits = (float *) malloc((MaxAtt+1) * sizeof(float));
ForEach(Att, 0, MaxAtt)
{
if ( (V = MaxAttVal[Att]) )
{
ValFreq = (ItemNo *) calloc(V+1, sizeof(ItemNo));
ForEach(i, 0, MaxItem)
{
ValFreq[DVal(Item[i],Att)]++;
}
Sum = 0;
ForEach(v, 1, V)
{
if ( ValFreq[v] )
{
Sum += (ValFreq[v] / (MaxItem+1.0)) *
(LogItemNo[MaxItem+1] - LogItemNo[ValFreq[v]]);
}
}
delete ValFreq;
BranchBits[Att] = Sum;
}
else
{
Quicksort(0, MaxItem, Att, 1);
PossibleCuts = 1;
ForEach(i, 1, MaxItem)
{
if ( CVal(Item[i],Att) > CVal(Item[i-1],Att) )
{
PossibleCuts++;
}
}
BranchBits[Att] = PossibleCuts > 1 ?
1 + LogItemNo[PossibleCuts] / 2 : 0 ;
}
SumBranches += BranchBits[Att];
}
AttTestBits = 0;
ForEach(Att, 0, MaxAtt)
{
if ( (p = BranchBits[Att] / SumBranches) > 0 )
{
AttTestBits -= p * log(p) / log(2.0);
}
}
}
/*************************************************************************/
/* */
/* Form composite ruleset for all trials */
/* */
/*************************************************************************/
void CompositeRuleset()
{
RuleNo r;
short t, ri;
InitialiseRules();
/* Lump together all the rules from each ruleset */
ForEach(t, 0, TRIALS-1)
{
ForEach(ri, 1, PRSet[t].SNRules)
{
r = PRSet[t].SRuleIndex[ri];
NewRule(PRSet[t].SRule[r].Lhs, PRSet[t].SRule[r].Size,
PRSet[t].SRule[r].Rhs, PRSet[t].SRule[r].Error);
}
}
/* ... and select a subset in the usual way */
ConstructRuleset();
fprintf(fLog,"\nComposite ruleset:\n");
PrintIndexedRules();
PRSet[TRIALS].SNRules = NRules;
PRSet[TRIALS].SRule = Rule;
PRSet[TRIALS].SRuleIndex = RuleIndex;
PRSet[TRIALS].SDefaultClass = DefaultClass;
}
/*************************************************************************/
/* */
/* Form a set of rules from a decision tree */
/* ---------------------------------------- */
/* */
/*************************************************************************/
/*************************************************************************/
/* */
/* Form a ruleset from decision tree t */
/* */
/*************************************************************************/
void FormRules(Tree t)
{
short i;
/* Find essential parameters and allocate storage */
MaxDepth = 0;
MaxDisjuncts = 0;
TreeParameters(t, 0);
Actual = (float *) calloc(MaxDepth+2, sizeof(float));
Total = (ItemNo *) calloc(MaxDepth+2, sizeof(ItemNo));
Errors = (ItemNo *) calloc(MaxDepth+2, sizeof(ItemNo));
Pessimistic = (float *) calloc(MaxDepth+2, sizeof(float));
CondSigLevel = (float *) calloc(MaxDepth+2, sizeof(float));
TargetClassFreq = (ItemNo *) calloc(2, sizeof(ItemNo));
Deleted = (Boolean *) calloc(MaxDepth+2, sizeof(Boolean));
CondSatisfiedBy = (char **) calloc(MaxDepth+2, sizeof(char *));
Stack = (Condition *) calloc(MaxDepth+2, sizeof(Condition));
ForEach(i, 0, MaxDepth+1)
{
CondSatisfiedBy[i] = (char *) calloc(MaxItem+1, sizeof(char));
Stack[i] = (Condition) malloc(sizeof(struct CondRec));
}
SingleValue = (DiscrValue *) calloc(MaxAtt+1, sizeof(DiscrValue));
InitialiseRules();
/* Extract and prune disjuncts */
Scan(t, 0);
/* Deallocate storage */
ForEach(i, 0, MaxDepth+1)
{
delete CondSatisfiedBy[i];
delete Stack[i];
}
delete Deleted;
delete CondSatisfiedBy;
delete Stack;
delete Actual;
delete Total;
delete Errors;
delete Pessimistic;
delete CondSigLevel;
delete TargetClassFreq;
}
/*************************************************************************/
/* */
/* Find the maximum depth and the number of leaves in tree t */
/* with initial depth d */
/* */
/*************************************************************************/
void TreeParameters(Tree t, short d)
{
DiscrValue v;
if ( t->NodeType )
{
ForEach(v, 1, t->Forks)
{
TreeParameters(t->Branch[v], d+1);
}
}
else
{
/* This is a leaf */
if ( d > MaxDepth )
MaxDepth = d;
MaxDisjuncts++;
}
}
/*************************************************************************/
/* */
/* Extract disjuncts from tree t at depth d, and process them */
/* */
/*************************************************************************/
void Scan(Tree t,short d)
{
DiscrValue v;
short i;
Condition *Term;
Test x;
if ( t->NodeType )
{
d++;
x = (Test) malloc(sizeof(struct TestRec));
x->NodeType = t->NodeType;
x->Tested = t->Tested;
x->Forks = t->Forks;
x->Cut = ( t->NodeType == ThreshContin ? t->Cut : 0 );
if ( t->NodeType == BrSubset )
{
x->Subset = (Set *) calloc(t->Forks + 1, sizeof(Set));
ForEach(v, 1, t->Forks)
{
x->Subset[v] = t->Subset[v];
}
}
Stack[d]->CondTest = FindTest(x);
ForEach(v, 1, t->Forks)
{
Stack[d]->TestValue = v;
Scan(t->Branch[v], d);
}
}
else if ( t->Items >= 1 )
{
/* Leaf of decision tree - construct the set of
conditions associated with this leaf and prune */
Term = (Condition *) calloc(d+1, sizeof(Condition));
ForEach(i, 1, d)
{
Term[i] = (Condition) malloc(sizeof(struct CondRec));
Term[i]->CondTest = Stack[i]->CondTest;
Term[i]->TestValue = Stack[i]->TestValue;
}
PruneRule(Term, d, t->Leaf);
delete Term;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -