📄 c4.5rulesgt.c
字号:
if ( RuleSpace > 100 ) { Rule = (PR *) realloc(Rule, RuleSpace * sizeof(PR)); } else { Rule = (PR *) malloc(RuleSpace * sizeof(PR)); } } /* Form the new rule */ Rule[NRules].Size = NConds; Rule[NRules].Lhs = (Condition *) calloc(NConds+1, sizeof(Condition)); ForEach(d, 1, NConds) { Rule[NRules].Lhs[d] = (Condition) malloc(sizeof(struct CondRec)); Rule[NRules].Lhs[d]->CondTest = Cond[d]->CondTest; Rule[NRules].Lhs[d]->TestValue = Cond[d]->TestValue; } Rule[NRules].Rhs = TargetClass; Rule[NRules].Error = Err; Verbosity(1) PrintRule(NRules); return true;}/*************************************************************************//* *//* Decide whether the given rule duplicates rule r *//* *//*************************************************************************/Boolean SameRule(r, Cond, NConds, TargetClass)/* -------- */ RuleNo r; Condition Cond[]; short NConds; ClassNo TargetClass;{ short d, i; Test SubTest1, SubTest2; if ( Rule[r].Size != NConds || Rule[r].Rhs != TargetClass ) { return false; } ForEach(d, 1, NConds) { if ( Rule[r].Lhs[d]->CondTest->NodeType != Cond[d]->CondTest->NodeType || Rule[r].Lhs[d]->CondTest->Tested != Cond[d]->CondTest->Tested ) { return false; } switch ( Cond[d]->CondTest->NodeType ) { case BrDiscr: if ( Rule[r].Lhs[d]->TestValue != Cond[d]->TestValue ) { return false; } break; case ThreshContin: if ( Rule[r].Lhs[d]->CondTest->Cut != Cond[d]->CondTest->Cut ) { return false; } break; case BrSubset: SubTest1 = Rule[r].Lhs[d]->CondTest; SubTest2 = Cond[d]->CondTest; ForEach(i, 1, SubTest1->Forks) { if ( SubTest1->Subset[i] != SubTest2->Subset[i] ) { return false; } } } } return true;}/*************************************************************************//* *//* Print the current indexed ruleset *//* *//*************************************************************************/ PrintIndexedRules()/* ----------------- */{ short ri; ForEach(ri, 1, NRules ) { PrintRule(RuleIndex[ri]); } printf("\nDefault class: %s\n", ClassName[DefaultClass]);}/*************************************************************************//* *//* Print the rule r *//* *//*************************************************************************/ PrintRule(r)/* --------- */ RuleNo r;{ short d; printf("\nRule %d:\n", r); ForEach(d, 1, Rule[r].Size) { printf(" "); PrintCondition(Rule[r].Lhs[d]); } printf("\t-> class %s [%.1f%%]\n", ClassName[Rule[r].Rhs], 100 * (1 - Rule[r].Error));}/*************************************************************************//* *//* Print a condition c of a production rule *//* *//*************************************************************************/ PrintCondition(c)/* -------------- */ Condition c;{ Test tp; DiscrValue v, pv, Last, Values=0; Boolean First=true; Attribute Att; tp = c->CondTest; v = c->TestValue; Att = tp->Tested; printf("\t%s", AttName[Att]); if ( v < 0 ) { printf(" is unknown\n"); return; } switch ( tp->NodeType ) { case BrDiscr: printf(" = %s\n", AttValName[Att][v]); break; case ThreshContin: printf(" %s %g\n", ( v == 1 ? "<=" : ">" ), tp->Cut); break; case BrSubset: /* Count values at this branch */ for ( pv=1 ; Values <= 1 && pv <= MaxAttVal[Att] ; pv++ ) { if ( In(pv, tp->Subset[v]) ) { Last = pv; Values++; } } if ( Values == 1 ) { printf(" = %s\n", AttValName[Att][Last]); break; } printf(" in "); ForEach(pv, 1, MaxAttVal[Att]) { if ( In(pv, tp->Subset[v]) ) { if ( First ) { printf("{"); First = false; } else { printf(", "); } printf("%s", AttValName[Att][pv]); } } printf("}\n"); }}/*************************************************************************//* *//* Tabluate logs and log factorials (to improve speed) *//* -------------------------------- *//* *//*************************************************************************/float *LogItemNo;double *LogFact;/*************************************************************************//* *//* Set up the array LogItemNo to contain the logs of integers and *//* the array LogFact to contain logs of factorials (all to base 2) *//* *//*************************************************************************/ GenerateLogs()/* ------------ */{ ItemNo i; LogItemNo = (float *) malloc((MaxItem+100) * sizeof(float)); LogFact = (double *) malloc((MaxItem+100) * sizeof(double)); LogItemNo[0] = -1E38; LogItemNo[1] = 0; LogFact[0] = LogFact[1] = 0; ForEach(i, 2, MaxItem+99) { LogItemNo[i] = log((float) i) / Log2; LogFact[i] = LogFact[i-1] + LogItemNo[i]; }}/*************************************************************************//* *//* Generate all rulesets from the decision trees *//* --------------------------------------------- *//* *//*************************************************************************//*************************************************************************//* *//* For each tree, form a set of rules and process them, then form a *//* composite set of rules from all of these sets. *//* If there is only one tree, then no composite set is formed. *//* *//* Rulesets are stored in PRSet[0] to PRSet[TRIALS], where *//* PRSet[TRIALS] contains the composite ruleset. *//* *//* On completion, the current ruleset is the composite ruleset (if one *//* has been made), otherwise the ruleset from the single tree. *//* *//*************************************************************************/ GenerateRules()/* ------------- */{ Tree DecisionTree, GetTree(); short t=0, RuleSetSpace=0, r; /* Find bits to encode attributes and branches */ FindTestCodes(); /* Now process each decision tree */ while ( DecisionTree = GetTree(".unpruned") ) { printf("\n------------------\n"); printf("Processing tree %d\n", t); /* Form a set of rules from the next tree */ FormRules(DecisionTree); /* Process the set of rules for this trial */ ConstructRuleset(); printf("\nFinal rules from tree %d:\n", t); PrintIndexedRules(); /* Make sure there is enough room for the new ruleset */ if ( t + 1 >= RuleSetSpace ) { RuleSetSpace += 10; if ( RuleSetSpace > 10 ) { PRSet = (RuleSet *) realloc(PRSet, RuleSetSpace * sizeof(RuleSet)); } else { PRSet = (RuleSet *) malloc(RuleSetSpace * sizeof(RuleSet)); } } PRSet[t].SNRules = NRules; PRSet[t].SRule = Rule; PRSet[t].SRuleIndex = RuleIndex; PRSet[t].SDefaultClass = DefaultClass; ++t; } if ( ! t ) { printf("\nERROR: can't find any decision trees\n"); exit(1); } TRIALS = t; /* If there is more than one tree in the trees file, make a composite ruleset of the rules from all trees */ if ( TRIALS > 1 ) { CompositeRuleset(); }}/*************************************************************************//* *//* Determine code lengths for attributes and branches *//* *//*************************************************************************/ FindTestCodes()/* ------------- */{ Attribute Att; DiscrValue v, V; ItemNo i, *ValFreq; int PossibleCuts; float Sum, SumBranches=0, p; void SwapUnweighted(); BranchBits = (float *) malloc((MaxAtt+1) * sizeof(float)); ForEach(Att, 0, MaxAtt) { if ( (V = MaxAttVal[Att]) ) { ValFreq = (ItemNo *) calloc(V+1, sizeof(ItemNo)); ForEach(i, 0, MaxItem) { ValFreq[DVal(Item[i],Att)]++; } Sum = 0; ForEach(v, 1, V) { if ( ValFreq[v] ) { Sum += (ValFreq[v] / (MaxItem+1.0)) * (LogItemNo[MaxItem+1] - LogItemNo[ValFreq[v]]); } } free(ValFreq); BranchBits[Att] = Sum; } else { Quicksort(0, MaxItem, Att, SwapUnweighted); PossibleCuts = 1; ForEach(i, 1, MaxItem) { if ( CVal(Item[i],Att) > CVal(Item[i-1],Att) ) { PossibleCuts++; } } BranchBits[Att] = PossibleCuts > 1 ? 1 + LogItemNo[PossibleCuts] / 2 : 0 ; } SumBranches += BranchBits[Att]; } AttTestBits = 0; ForEach(Att, 0, MaxAtt) { if ( (p = BranchBits[Att] / SumBranches) > 0 ) { AttTestBits -= p * log(p) / log(2.0); } }}/*************************************************************************//* *//* Exchange items at a and b. Note: unlike the similar routine in *//* buildtree, this does not assume that items have a Weight to be *//* swapped as well! *//* *//*************************************************************************/void SwapUnweighted(a, b)/* -------------- */ ItemNo a, b;{ Description Hold; Hold = Item[a]; Item[a] = Item[b]; Item[b] = Hold;}/*************************************************************************//* *//* Form composite ruleset for all trials *//* *//*************************************************************************/ CompositeRuleset()/* ---------------- */{ RuleNo r; short t, ri; Boolean NewRule(); InitialiseRules(); /* Lump together all the rules from each ruleset */ ForEach(t, 0, TRIALS-1) { ForEach(ri, 1, PRSet[t].SNRules) { r = PRSet[t].SRuleIndex[ri]; NewRule(PRSet[t].SRule[r].Lhs, PRSet[t].SRule[r].Size, PRSet[t].SRule[r].Rhs, PRSet[t].SRule[r].Error); } } /* ... and select a subset in the usual way */ ConstructRuleset(); printf("\nComposite ruleset:\n"); PrintIndexedRules(); PRSet[TRIALS].SNRules = NRules; PRSet[TRIALS].SRule = Rule; PRSet[TRIALS].SRuleIndex = RuleIndex; PRSet[TRIALS].SDefaultClass = DefaultClass;}/*************************************************************************//* *//* Form a set of rules from a decision tree *//* ---------------------------------------- *//* *//*************************************************************************/ItemNo *TargetClassFreq, /* [Boolean] */ *Errors, /* [Condition] */ *Total; /* [Condition] */float *Pessimistic, /* [Condition] */ *Actual, /* [Condition] */ *CondSigLevel; /* [Condition] */Boolean **CondSatisfiedBy, /* [Condition][ItemNo] */ *Deleted; /* [Condition] */DiscrValue *SingleValue; /* [Attribute] */Condition *Stack;short MaxDisjuncts, MaxDepth;/*************************************************************************//* *//* Form a ruleset from decision tree t *//* *//*************************************************************************/ FormRules(t)/* ---------- */ Tree t;{ short i; /* Find essential parameters and allocate storage */ MaxDepth = 0; MaxDisjuncts = 0; TreeParameters(t, 0); Actual = (float *) calloc(MaxDepth+2, sizeof(float)); Total = (ItemNo *) calloc(MaxDepth+2, sizeof(ItemNo)); Errors = (ItemNo *) calloc(MaxDepth+2, sizeof(ItemNo)); Pessimistic = (float *) calloc(MaxDepth+2, sizeof(float)); CondSigLevel = (float *) calloc(MaxDepth+2, sizeof(float)); TargetClassFreq = (ItemNo *) calloc(2, sizeof(ItemNo)); Deleted = (Boolean *) calloc(MaxDepth+2, sizeof(Boolean)); CondSatisfiedBy = (char **) calloc(MaxDepth+2, sizeof(char *)); Stack = (Condition *) calloc(MaxDepth+2, sizeof(Condition)); ForEach(i, 0, MaxDepth+1) { CondSatisfiedBy[i] = (char *) calloc(MaxItem+1, sizeof(char)); Stack[i] = (Condition) malloc(sizeof(struct CondRec)); } SingleValue = (DiscrValue *) calloc(MaxAtt+1, sizeof(DiscrValue)); InitialiseRules(); /* Extract and prune disjuncts */ Scan(t, 0); /* Deallocate storage */ ForEach(i, 0, MaxDepth+1) { free(CondSatisfiedBy[i]); free(Stack[i]); } free(Deleted); free(CondSatisfiedBy); free(Stack); free(Actual); free(Total); free(Errors); free(Pessimistic); free(CondSigLevel); free(TargetClassFreq);}/*************************************************************************//* *//* Find the maximum depth and the number of leaves in tree t *//* with initial depth d *//* *//*************************************************************************/ TreeParameters(t, d)/* --------------- */ Tree t; short d;{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -