📄 info.c
字号:
/*************************************************************************/
/* */
/* Calculate information, information gain, and print dists */
/* -------------------------------------------------------- */
/* */
/*************************************************************************/
#include "buildex.i"
/*************************************************************************/
/* */
/* Determine the worth of a particular split according to the */
/* operative criterion */
/* */
/* Parameters: */
/* SplitInfo: potential info of the split */
/* SplitGain: gain in info of the split */
/* MinGain: gain above which the Gain Ratio */
/* may be used */
/* */
/* If the Gain criterion is being used, the information gain of */
/* the split is returned, but if the Gain Ratio criterion is */
/* being used, the ratio of the information gain of the split to */
/* its potential information is returned. */
/* */
/*************************************************************************/
float Worth(ThisInfo, ThisGain, MinGain)
/* ----- */
float ThisInfo, ThisGain, MinGain;
{
if ( GAINRATIO )
{
if ( ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon )
{
return ThisGain / ThisInfo;
}
else
{
return -Epsilon;
}
}
else
{
return ( ThisInfo > 0 && ThisGain > -Epsilon ? ThisGain : -Epsilon );
}
}
/*************************************************************************/
/* */
/* Zero the frequency tables Freq[][] and ValFreq[] up to MaxVal */
/* */
/*************************************************************************/
ResetFreq(MaxVal)
/* --------- */
DiscrValue MaxVal;
{
DiscrValue v;
ClassNo c;
ForEach(v, 0, MaxVal)
{
ForEach(c, 0, MaxClass)
{
Freq[v][c] = 0;
}
ValFreq[v] = 0;
}
}
/*************************************************************************/
/* */
/* Given tables Freq[][] and ValFreq[], compute the information gain. */
/* */
/* Parameters: */
/* BaseInfo: average information for all items with */
/* known values of the test attribute */
/* UnknownRate: fraction of items with unknown ditto */
/* MaxVal: number of forks */
/* TotalItems: number of items with known values of */
/* test att */
/* */
/* where Freq[x][y] contains the no. of cases with value x for a */
/* particular attribute that are members of class y, */
/* and ValFreq[x] contains the no. of cases with value x for a */
/* particular attribute */
/* */
/*************************************************************************/
float ComputeGain(BaseInfo, UnknFrac, MaxVal, TotalItems)
/* ----------- */
float BaseInfo, UnknFrac;
DiscrValue MaxVal;
ItemCount TotalItems;
{
DiscrValue v;
float ThisInfo=0.0, ThisGain, TotalInfo();
short ReasonableSubsets=0;
/* Check whether all values are unknown or the same */
if ( ! TotalItems ) return -Epsilon;
/* There must be at least two subsets with MINOBJS items */
ForEach(v, 1, MaxVal)
{
if ( ValFreq[v] >= MINOBJS ) ReasonableSubsets++;
}
if ( ReasonableSubsets < 2 ) return -Epsilon;
/* Compute total info after split, by summing the
info of each of the subsets formed by the test */
ForEach(v, 1, MaxVal)
{
ThisInfo += TotalInfo(Freq[v], 0, MaxClass);
}
/* Set the gain in information for all items, adjusted for unknowns */
ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems);
Verbosity(5)
printf("ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n",
TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain);
return ThisGain;
}
/*************************************************************************/
/* */
/* Compute the total information in V[ MinVal..MaxVal ] */
/* */
/*************************************************************************/
float TotalInfo(V, MinVal, MaxVal)
/* --------- */
ItemCount V[];
DiscrValue MinVal, MaxVal;
{
DiscrValue v;
float Sum=0.0;
ItemCount N, TotalItems=0;
ForEach(v, MinVal, MaxVal)
{
N = V[v];
Sum += N * Log(N);
TotalItems += N;
}
return TotalItems * Log(TotalItems) - Sum;
}
/*************************************************************************/
/* */
/* Print distribution table for given attribute */
/* */
/*************************************************************************/
PrintDistribution(Att, MaxVal, ShowNames)
/* ----------------- */
Attribute Att;
DiscrValue MaxVal;
Boolean ShowNames;
{
DiscrValue v;
ClassNo c;
String Val;
printf("\n\t\t\t ");
ForEach(c, 0, MaxClass)
{
printf("%7.6s", ClassName[c]);
}
printf("\n");
ForEach(v, 0, MaxVal)
{
if ( ShowNames )
{
Val = ( !v ? "unknown" :
MaxAttVal[Att] ? AttValName[Att][v] :
v == 1 ? "below" : "above" );
printf("\t\t[%-7.7s:", Val);
}
else
{
printf("\t\t[%-7d:", v);
}
ForEach(c, 0, MaxClass)
{
printf(" %6.1f", Freq[v][c]);
}
printf("]\n");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -