📄 info.c
字号:
/*************************************************************************//* *//* Calculate information, information gain, and print dists *//* -------------------------------------------------------- *//* *//*************************************************************************/#include "buildex.i"/*************************************************************************//* *//* Determine the worth of a particular split according to the *//* operative criterion *//* *//* Parameters: *//* SplitInfo: potential info of the split *//* SplitGain: gain in info of the split *//* MinGain: gain above which the Gain Ratio *//* may be used *//* *//* If the Gain criterion is being used, the information gain of *//* the split is returned, but if the Gain Ratio criterion is *//* being used, the ratio of the information gain of the split to *//* its potential information is returned. *//* *//*************************************************************************/float Worth(ThisInfo, ThisGain, MinGain)/* ----- */ float ThisInfo, ThisGain, MinGain;{ if ( GAINRATIO ) { if ( ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon ) { return ThisGain / ThisInfo; } else { return -Epsilon; } } else { return ( ThisInfo > 0 && ThisGain > -Epsilon ? ThisGain : -Epsilon ); }}/*************************************************************************//* *//* Zero the frequency tables Freq[][] and ValFreq[] up to MaxVal *//* *//*************************************************************************/ ResetFreq(MaxVal)/* --------- */ DiscrValue MaxVal;{ DiscrValue v; ClassNo c; ForEach(v, 0, MaxVal) { ForEach(c, 0, MaxClass) { Freq[v][c] = 0; } ValFreq[v] = 0; } }/*************************************************************************//* *//* Given tables Freq[][] and ValFreq[], compute the information gain. *//* *//* Parameters: *//* BaseInfo: average information for all items with *//* known values of the test attribute *//* UnknownRate: fraction of items with unknown ditto *//* MaxVal: number of forks *//* TotalItems: number of items with known values of *//* test att *//* *//* where Freq[x][y] contains the no. of cases with value x for a *//* particular attribute that are members of class y, *//* and ValFreq[x] contains the no. of cases with value x for a *//* particular attribute *//* *//*************************************************************************/float ComputeGain(BaseInfo, UnknFrac, MaxVal, TotalItems)/* ----------- */ float BaseInfo, UnknFrac; DiscrValue MaxVal; ItemCount TotalItems;{ DiscrValue v; float ThisInfo=0.0, ThisGain, TotalInfo(); short ReasonableSubsets=0; /* Check whether all values are unknown or the same */ if ( ! TotalItems ) return -Epsilon; /* There must be at least two subsets with MINOBJS items */ ForEach(v, 1, MaxVal) { if ( ValFreq[v] >= MINOBJS ) ReasonableSubsets++; } if ( ReasonableSubsets < 2 ) return -Epsilon; /* Compute total info after split, by summing the info of each of the subsets formed by the test */ ForEach(v, 1, MaxVal) { ThisInfo += TotalInfo(Freq[v], 0, MaxClass); } /* Set the gain in information for all items, adjusted for unknowns */ ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems); Verbosity(5) printf("ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n", TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain); return ThisGain;}/*************************************************************************//* *//* Compute the total information in V[ MinVal..MaxVal ] *//* *//*************************************************************************/float TotalInfo(V, MinVal, MaxVal)/* --------- */ ItemCount V[]; DiscrValue MinVal, MaxVal;{ DiscrValue v; float Sum=0.0; ItemCount N, TotalItems=0; ForEach(v, MinVal, MaxVal) { N = V[v]; Sum += N * Log(N); TotalItems += N; } return TotalItems * Log(TotalItems) - Sum;}/*************************************************************************//* *//* Print distribution table for given attribute *//* *//*************************************************************************/ PrintDistribution(Att, MaxVal, ShowNames)/* ----------------- */ Attribute Att; DiscrValue MaxVal; Boolean ShowNames;{ DiscrValue v; ClassNo c; String Val; printf("\n\t\t\t "); ForEach(c, 0, MaxClass) { printf("%7.6s", ClassName[c]); } printf("\n"); ForEach(v, 0, MaxVal) { if ( ShowNames ) { Val = ( !v ? "unknown" : MaxAttVal[Att] ? AttValName[Att][v] : v == 1 ? "below" : "above" ); printf("\t\t[%-7.7s:", Val); } else { printf("\t\t[%-7d:", v); } ForEach(c, 0, MaxClass) { printf(" %6.1f", Freq[v][c]); } printf("]\n"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -