📄 c4.5rulesgt.c
字号:
/* ---------- */ RuleNo r;{ ItemNo i; RuleIn[r] = false; ForEach(i, 0, MaxItem) { if ( Match[r][i] ) { Covered[i]--; } } Verbosity(1) printf("%5d- %6.1f", r, -Value[r]);}/*************************************************************************//* *//* Make an index of included rules in RuleIndex. Select first those *//* classes whose rules have the fewest false positives. Within a *//* class, put rules with higher accuracy ahead. *//* *//*************************************************************************/ MakeIndex()/* --------- */{ ClassNo c, BestC, Pass; RuleNo r, BestR, NewNRules = 0; ItemNo i; Boolean *Included; Included = (Boolean *) calloc(MaxClass+1, sizeof(Boolean)); RuleIndex = (RuleNo *) calloc(NRules+1, sizeof(RuleNo)); Verbosity(1) printf("\nFalsePos Class\n"); ForEach(i, 0, MaxItem) { Covered[i] = 0; } /* Select the best class to put next */ ForEach(Pass, 0, MaxClass) { ForEach(c, 0, MaxClass) { if ( Included[c] ) continue; FalsePos[c] = 0; ForEach(i, 0, MaxItem) { if ( Covered[i] || Class(Item[i]) == c ) continue; ForEach(r, 1, NRules) { if ( Rule[r].Rhs == c && RuleIn[r] && Match[r][i] ) { FalsePos[c]++; break; } } } } BestC = -1; ForEach(c, 0, MaxClass) { if ( ! Included[c] && ( BestC < 0 || FalsePos[c] < FalsePos[BestC] ) ) { BestC = c; } } Included[BestC] = true; Verbosity(1) printf("%5d %s\n", FalsePos[BestC], ClassName[BestC]); /* Now grab the rules for this class */ do { BestR = 0; /* Find the best rule to put next */ ForEach(r, 1, NRules) { if ( RuleIn[r] && Rule[r].Rhs == BestC && ( ! BestR || Rule[r].Error < Rule[BestR].Error ) ) { BestR = r; } } if ( BestR ) { RuleIndex[++NewNRules] = BestR; RuleIn[BestR] = false; ForEach(i, 0, MaxItem) { Covered[i] |= Match[BestR][i]; } } } while ( BestR ); } NRules = NewNRules; free(Included);}/*************************************************************************//* *//* Find the default class as the one with most items not covered by *//* any rule. Resolve ties in favour of more frequent classes. *//* (Note: Covered has been set by MakeIndex.) *//* *//*************************************************************************/ FindDefault()/* ----------- */{ ClassNo c; ItemNo i; /* Determine uncovered items */ ForEach(c, 0, MaxClass) { NoRule[c] = 0; } ForEach(i, 0, MaxItem) { if ( ! Covered[i] ) { NoRule[Class(Item[i])]++; } } Verbosity(1) { printf("\nItems: Uncovered Class\n"); ForEach(c, 0, MaxClass) { printf("%5d %7d %s\n", ClassFreq[c], NoRule[c], ClassName[c]); } printf("\n"); } DefaultClass = 0; ForEach(c, 1, MaxClass) { if ( NoRule[c] > NoRule[DefaultClass] || NoRule[c] == NoRule[DefaultClass] && ClassFreq[c] > ClassFreq[DefaultClass] ) { DefaultClass = c; } }}/*************************************************************************//* *//* Given a rule and a case, determine the strength with which we can *//* conclude that the case belongs to the class specified by the rule's *//* right-hand side. *//* *//* If the case doesn't satisfy all the conditions of the rule, *//* then this is 0. *//* *//*************************************************************************/float Strength(ThisRule, Case)/* -------- */ PR ThisRule; Description Case;{ short d; Boolean Satisfies(); if ( ThisRule.Error > 0.7 ) return 0.0; ForEach(d, 1, ThisRule.Size) { if ( ! Satisfies(Case, ThisRule.Lhs[d]) ) { return 0.0; } } return ( 1 - ThisRule.Error );}/*************************************************************************//* *//* Determine the number of bits to encode exceptions. Unlike the *//* version in the book, this uses an approximate encoding that *//* penalizes unbalanced numbers of false positives and false negatives *//* as described in my paper at 1995 International Machine Learning *//* Conference (published by Morgan Kaufmann). *//* *//*************************************************************************/float Biased(N, E, ExpE)/* ------ */ int N, E; float ExpE;{ float Rate; if ( ExpE <= 1E-6 ) { return ( E == 0 ? 0.0 : 1E6 ); } else if ( ExpE >= N-1E-6 ) { return ( E == N ? 0.0 : 1E6 ); } Rate = ExpE / N; return -E * Log(Rate) - (N-E) * Log(1-Rate);}float ExceptionBits(Fires, FP, FN)/* ------------- */ int Fires, FP, FN;{ if ( Fires > 0.5 * (MaxItem+1) ) { return Log(MaxItem+1) + Biased(Fires, FP, 0.5 * (FP+FN)) + Biased(MaxItem+1-Fires, FN, (float) FN); } else { return Log(MaxItem+1) + Biased(Fires, FP, (float) FP) + Biased(MaxItem+1-Fires, FN, 0.5 * (FP+FN)); }}/*************************************************************************//* *//* Find encoding lengths for all rules *//* *//*************************************************************************/ FindRuleCodes()/* ------------- */{ RuleNo r; short d, NCond; float Bits, CondBits(); ForEach(r, 1, NRules) { NCond = Rule[r].Size; Bits = 0; ForEach(d, 1, NCond) { Bits += CondBits(Rule[r].Lhs[d]); } /* Must encode the number of conditions, but credit the total encoding by the ways conditions can be reordered */ Rule[r].Bits = Bits + LogItemNo[NCond] - LogFact[NCond]; }}/*************************************************************************//* *//* Determine the number of bits required to encode a condition *//* *//*************************************************************************/float CondBits(C)/* -------- */ Condition C;{ Test t; Attribute a; t = C->CondTest; a = t->Tested; switch ( t->NodeType ) { case BrDiscr: /* test of discrete attribute */ case ThreshContin: /* test of continuous attribute */ return AttTestBits/REDUNDANCY + BranchBits[a]; case BrSubset: /* subset test on discrete attribute */ return AttTestBits/REDUNDANCY + MaxAttVal[a]; } }/*************************************************************************//* *//* Evaluatation of rulesets *//* ------------------------ *//* *//*************************************************************************//*************************************************************************//* *//* Evaluate all rulesets *//* *//*************************************************************************/ EvaluateRulesets(DeleteRules)/* ---------------- */ Boolean DeleteRules;{ short t; ItemNo *Errors, Interpret(); float AvSize=0, AvErrs=0; Boolean Final; if ( TRIALS == 1 ) { /* Evaluate current ruleset as there is no composite ruleset */ Interpret(0, MaxItem, DeleteRules, true, true); return; } Errors = (ItemNo *) malloc((TRIALS+1) * sizeof(ItemNo)); ForEach(t, 0, TRIALS) { NRules = PRSet[t].SNRules; Rule = PRSet[t].SRule; RuleIndex = PRSet[t].SRuleIndex; DefaultClass = PRSet[t].SDefaultClass; if ( t < TRIALS ) { printf("\nRuleset %d:\n", t); } else { printf("\nComposite ruleset:\n"); } Final = (t == TRIALS); Errors[t] = Interpret(0, MaxItem, DeleteRules, Final, Final); AvSize += NRules; AvErrs += Errors[t]; if ( DeleteRules ) { PRSet[t].SNRules = NRules; } } /* Print report */ printf("\n"); printf("Trial Size Errors\n"); printf("----- ---- ------\n"); ForEach(t, 0, TRIALS) { if ( t < TRIALS ) { printf("%4d", t); } else { printf(" **"); } printf(" %4d %3d(%4.1f%%)\n", PRSet[t].SNRules, Errors[t], 100 * Errors[t] / (MaxItem+1.0)); } AvSize /= TRIALS + 1; AvErrs /= TRIALS + 1; printf("\t\t\t\tAv size = %.1f, av errors = %.1f (%.1f%%)\n", AvSize, AvErrs, 100 * AvErrs / (MaxItem+1.0));}/*************************************************************************//* *//* Evaluate current ruleset *//* *//*************************************************************************/float Confidence; /* certainty factor of fired rule */ /* (set by BestRuleIndex) */ItemNo Interpret(Fp, Lp, DeleteRules, CMInfo, Arrow)/* --------- */ ItemNo Fp, Lp; Boolean DeleteRules, CMInfo, Arrow;{ ItemNo i, Tested=0, Errors=0, *Better, *Worse, *ConfusionMat; Boolean FoundRule; ClassNo AssignedClass, AltClass; Attribute Att; RuleNo p, Bestr, ri, ri2, riDrop=0, BestRuleIndex(); float ErrorRate, BestRuleConfidence; if ( CMInfo ) { ConfusionMat = (ItemNo *) calloc((MaxClass+1)*(MaxClass+1), sizeof(ItemNo)); } ForEach(ri, 1, NRules) { p = RuleIndex[ri]; Rule[p].Used = Rule[p].Incorrect = 0; } Better = (ItemNo *) calloc(NRules+1, sizeof(ItemNo)); Worse = (ItemNo *) calloc(NRules+1, sizeof(ItemNo)); ForEach(i, Fp, Lp) { /* Find first choice for rule for this item */ ri = BestRuleIndex(Item[i], 1); Bestr = ( ri ? RuleIndex[ri] : 0 ); FoundRule = Bestr > 0; if ( FoundRule ) { Rule[Bestr].Used++; AssignedClass = Rule[Bestr].Rhs; BestRuleConfidence = Confidence; /* Now find second choice */ ri2 = BestRuleIndex(Item[i], ri+1); AltClass = ( ri2 ? Rule[RuleIndex[ri2]].Rhs : DefaultClass ); if ( AltClass != AssignedClass ) { if ( AssignedClass == Class(Item[i]) ) { Better[ri]++; } else if ( AltClass == Class(Item[i]) ) { Worse[ri]++; } } } else { AssignedClass = DefaultClass; } if ( CMInfo ) { ConfusionMat[Class(Item[i])*(MaxClass+1)+AssignedClass]++; } Tested++; if ( AssignedClass != Class(Item[i]) ) { Errors++; if ( FoundRule ) Rule[Bestr].Incorrect++; Verbosity(3) { printf("\n"); ForEach(Att, 0, MaxAtt) { printf("\t%s: ", AttName[Att]); if ( MaxAttVal[Att] ) { if ( DVal(Item[i],Att) ) { printf("%s\n", AttValName[Att][DVal(Item[i],Att)]); } else { printf("?\n"); } } else { if ( CVal(Item[i],Att) != Unknown ) { printf("%g\n", CVal(Item[i],Att)); } else { printf("?\n"); } } } printf("\t%4d:\tGiven class %s,", i, ClassName[Class(Item[i])]); if ( FoundRule ) { printf(" rule %d [%.1f%%] gives class ", Bestr, 100 * BestRuleConfidence); } else { printf(" default class "); } printf("%s\n", ClassName[AssignedClass]); } } } printf("\nRule Size Error Used Wrong\t Advantage\n"); printf( "---- ---- ----- ---- -----\t ---------\n"); ForEach(ri, 1, NRules) { p = RuleIndex[ri]; if ( Rule[p].Used > 0 ) { ErrorRate = Rule[p].Incorrect / (float) Rule[p].Used; printf("%4d%6d%6.1f%%%6d%7d (%.1f%%)\t%6d (%d|%d) \t%s\n", p, Rule[p].Size, 100 * Rule[p].Error, Rule[p].Used, Rule[p].Incorrect, 100 * ErrorRate, Better[ri]-Worse[ri], Better[ri], Worse[ri], ClassName[Rule[p].Rhs]); /* See whether this rule should be dropped. Note: can only drop one rule at a time, because Better and Worse are affected */ if ( DeleteRules && ! riDrop && Worse[ri] > Better[ri] ) { riDrop = ri; } } } free(Better); free(Worse); if ( riDrop ) { printf("\nDrop rule %d\n", RuleIndex[riDrop]); ForEach(ri, riDrop+1, NRules) { RuleIndex[ri-1] = RuleIndex[ri]; } NRules--; if ( CMInfo ) free(ConfusionMat); return Interpret(Fp, Lp, DeleteRules, true, Arrow); } else { printf("\nTested %d, errors %d (%.1f%%)%s\n", Tested, Errors, 100 * Errors / (float) Tested, ( Arrow ? " <<" : "" )); } if ( CMInfo ) { PrintConfusionMatrix(ConfusionMat); free(ConfusionMat); } return Errors;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -