📄 info.cpp
字号:
/*************************************************************************/
/* */
/* Calculate information, information gain, and print dists */
/* -------------------------------------------------------- */
/* */
/*************************************************************************/
#include "buildex.h"
#include "c45types.h"
#include "extern.h"
extern FILE *fpScreen;
/*************************************************************************/
/* */
/* Determine the worth of a particular split according to the */
/* operative criterion */
/* */
/* Parameters: */
/* SplitInfo: potential info of the split */
/* SplitGain: gain in info of the split */
/* MinGain: gain above which the Gain Ratio */
/* may be used */
/* */
/* If the Gain criterion is being used, the information gain of */
/* the split is returned, but if the Gain Ratio criterion is */
/* being used, the ratio of the information gain of the split to */
/* its potential information is returned. */
/* */
/*************************************************************************/
double Worth(double ThisInfo, double ThisGain, double MinGain)
{
if ( GAINRATIO )
{
if ( ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon )
{
return ThisGain / ThisInfo;
}
else
{
return -Epsilon;
}
}
else
{
return ThisInfo > 0 && ThisGain > 0 ? ThisGain : - Epsilon;
}
}
/*************************************************************************/
/* */
/* Zero the frequency tables Freq[][] and ValFreq[] up to MaxVal */
/* */
/*************************************************************************/
void ResetFreq(DiscrValue MaxVal)
/* --------- */
{
DiscrValue v;
ClassNo c;
ForEach(v, 0, MaxVal)
{
ForEach(c, 0, MaxClass)
{
Freq[v][c] = 0;
}
ValFreq[v] = 0;
}
}
/*************************************************************************/
/* */
/* Given tables Freq[][] and ValFreq[], compute the information gain. */
/* */
/* Parameters: */
/* BaseInfo: average information for all items with */
/* known values of the test attribute */
/* UnknownRate: fraction of items with unknown ditto */
/* MaxVal: number of forks */
/* TotalItems: number of items with known values of */
/* test att */
/* */
/* where Freq[x][y] contains the no. of cases with value x for a */
/* particular attribute that are members of class y, */
/* and ValFreq[x] contains the no. of cases with value x for a */
/* particular attribute */
/* */
/*************************************************************************/
double ComputeGain(double BaseInfo, double UnknFrac, DiscrValue MaxVal, ItemCount TotalItems)
/* ----------- */
{
DiscrValue v;
double ThisInfo=0.0, ThisGain;
int ReasonableSubsets=0;
/* Check whether all values are unknown or the same */
if ( ! TotalItems ) return -Epsilon;
/* There must be at least two subsets with MINOBJS items */
ForEach(v, 1, MaxVal)
{
if ( ValFreq[v] >= MINOBJS ) ReasonableSubsets++;
}
if ( ReasonableSubsets < 2 ) return -Epsilon;
/* Compute total info after split, by summing the
info of each of the subsets formed by the test */
ForEach(v, 1, MaxVal)
{
ThisInfo += TotalInfo(Freq[v], 0, MaxClass);
}
/* Set the gain in information for all items, adjusted for unknowns */
ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems);
Verbosity(5)
{
fprintf(fpScreen,"ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n",
TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain);
printf("ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n",
TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain);
}
return ThisGain;
}
/*************************************************************************/
/* */
/* Compute the total information in V[ MinVal..MaxVal ] */
/* */
/*************************************************************************/
double TotalInfo(ItemCount V[], DiscrValue MinVal, DiscrValue MaxVal)
/* --------- */
{
DiscrValue v;
double Sum=0.0;
ItemCount N, TotalItems=0;
ForEach(v, MinVal, MaxVal)
{
N = V[v];
Sum += N * Log(N);
TotalItems += N;
}
return TotalItems * Log(TotalItems) - Sum;
}
/*************************************************************************/
/* */
/* Print distribution table for given attribute */
/* */
/*************************************************************************/
void PrintDistribution(Attribute Att, DiscrValue MaxVal,Boolean ShowNames)
/* ----------------- */
{
DiscrValue v;
ClassNo c;
String Val;
fprintf(fpScreen,"\n\t\t\t ");
printf("\n\t\t\t ");
ForEach(c, 0, MaxClass)
{
fprintf(fpScreen,"%7.6s", ClassName[c]);
printf("%7.6s", ClassName[c]);
}
fprintf(fpScreen,"\n");
printf("\n");
ForEach(v, 0, MaxVal)
{
if ( ShowNames )
{
Val = ( !v ? "unknown" :
MaxAttVal[Att] ? AttValName[Att][v] :
v == 1 ? "below" : "above" );
fprintf(fpScreen,"\t\t[%-7.7s:", Val);
printf("\t\t[%-7.7s:", Val);
}
else
{
fprintf(fpScreen,"\t\t[%-7d:", v);
printf("\t\t[%-7d:", v);
}
ForEach(c, 0, MaxClass)
{
fprintf(fpScreen," %6.1f", Freq[v][c]);
printf(" %6.1f", Freq[v][c]);
}
fprintf(fpScreen,"]\n");
printf("]\n");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -