📄 info.c
字号:
/*************************************************************************//* *//* Calculate information, information gain, and print dists *//* -------------------------------------------------------- *//* *//*************************************************************************/#include "buildex.i"/*************************************************************************//* *//* Determine the worth of a particular split according to the *//* operative criterion *//* *//* Parameters: *//* SplitInfo: potential info of the split *//* SplitGain: gain in info of the split *//* MinGain: gain above which the Gain Ratio *//* may be used *//* *//* If the Gain criterion is being used, the information gain of *//* the split is returned, but if the Gain Ratio criterion is *//* being used, the ratio of the information gain of the split to *//* its potential information is returned. *//* *//*************************************************************************/float TotalInfo(ItemCount V[],DiscrValue MinVal,DiscrValue MaxVal);
float Worth(float ThisInfo,float ThisGain,float MinGain)/* ----- */{ if ( GAINRATIO ) { if ( ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon ) { return ThisGain / ThisInfo; } else { return -(float)Epsilon; } } else { return ( ThisInfo > 0 && ThisGain > -(float)Epsilon ? ThisGain : -(float)Epsilon ); }}/*************************************************************************//* *//* Zero the frequency tables Freq[][] and ValFreq[] up to MaxVal *//* *//*************************************************************************/void ResetFreq(DiscrValue MaxVal)/* --------- */{ DiscrValue v; ClassNo c; ForEach(v, 0, MaxVal) { ForEach(c, 0, MaxClass) { Freq[v][c] = 0; } ValFreq[v] = 0; } }/*************************************************************************//* *//* Given tables Freq[][] and ValFreq[], compute the information gain. *//* *//* Parameters: *//* BaseInfo: average information for all items with *//* known values of the test attribute *//* UnknownRate: fraction of items with unknown ditto *//* MaxVal: number of forks *//* TotalItems: number of items with known values of *//* test att *//* *//* where Freq[x][y] contains the no. of cases with value x for a *//* particular attribute that are members of class y, *//* and ValFreq[x] contains the no. of cases with value x for a *//* particular attribute *//* *//*************************************************************************/float ComputeGain(float BaseInfo,float UnknFrac,DiscrValue MaxVal,ItemCount TotalItems)/* ----------- */{ DiscrValue v; float ThisInfo=0.0, ThisGain; short ReasonableSubsets=0; /* Check whether all values are unknown or the same */ if ( ! TotalItems ) return -(float)Epsilon; /* There must be at least two subsets with MINOBJS items */ ForEach(v, 1, MaxVal) { if ( ValFreq[v] >= MINOBJS ) ReasonableSubsets++; } if ( ReasonableSubsets < 2 ) return -(float)Epsilon; /* Compute total info after split, by summing the info of each of the subsets formed by the test */ ForEach(v, 1, MaxVal) { ThisInfo += TotalInfo(Freq[v], 0, MaxClass); } /* Set the gain in information for all items, adjusted for unknowns */ ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems); Verbosity(5) printf("ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n", TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain); return ThisGain;}/*************************************************************************//* *//* Compute the total information in V[ MinVal..MaxVal ] *//* *//*************************************************************************/float TotalInfo(ItemCount V[],DiscrValue MinVal,DiscrValue MaxVal)/* --------- */{ DiscrValue v; float Sum=0.0; ItemCount N, TotalItems=0; ForEach(v, MinVal, MaxVal)
{ N = V[v]; Sum += N * (float) Log(N); TotalItems += N; } return TotalItems *(float) Log(TotalItems) - Sum;}/*************************************************************************//* *//* Print distribution table for given attribute *//* *//*************************************************************************/void PrintDistribution(Attribute Att,DiscrValue MaxVal,Boolean ShowNames)/* ----------------- */{ DiscrValue v; ClassNo c; String Val; printf("\n\t\t\t "); ForEach(c, 0, MaxClass) { printf("%7.6s", ClassName[c]); } printf("\n"); ForEach(v, 0, MaxVal) { if ( ShowNames ) { Val = ( !v ? "unknown" : MaxAttVal[Att] ? AttValName[Att][v] : v == 1 ? "below" : "above" ); printf("\t\t[%-7.7s:", Val); } else { printf("\t\t[%-7d:", v); } ForEach(c, 0, MaxClass) { printf(" %6.1f", Freq[v][c]); } printf("]\n"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -