⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 info.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
字号:
/*************************************************************************/
/*									 */
/*	Calculate information, information gain, and print dists	 */
/*	--------------------------------------------------------	 */
/*									 */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

extern FILE *fLog;

extern ItemCount
	*Weight,	/* Weight[i]  = current fraction of item i */
	**Freq,		/* Freq[x][c] = no. items of class c with outcome x */
	*ValFreq;	/* ValFreq[x] = no. items with att value v */

extern float
	*Gain,		/* Gain[a] = info gain by split on att a */
	*Info,		/* Info[a] = potential info from split on att a */
	*Bar,		/* Bar[a]  = best threshold for contin att a */
	*UnknownRate;	/* UnknownRate[a] = current unknown rate for att a */

extern char
	*Tested;	/* Tested[a] = true if att a already tested */

/*************************************************************************/
/*									 */
/*  Determine the worth of a particular split according to the		 */
/*  operative criterion							 */
/*									 */
/*	    Parameters:							 */
/*		SplitInfo:	potential info of the split		 */
/*		SplitGain:	gain in info of the split		 */
/*		MinGain:	gain above which the Gain Ratio		 */
/*				may be used				 */
/*									 */
/*  If the Gain criterion is being used, the information gain of	 */
/*  the split is returned, but if the Gain Ratio criterion is		 */
/*  being used, the ratio of the information gain of the split to	 */
/*  its potential information is returned.				 */
/*									 */
/*************************************************************************/


float Worth(float ThisInfo, float ThisGain, float MinGain)
{
    if ( GAINRATIO )
    {
		if ( ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon )
		{
			return ThisGain / ThisInfo;
		}
		else
		{
			return -Epsilon;
		}
    }
    else
    {
		return ( ThisInfo > 0 && ThisGain > -Epsilon ? ThisGain : -Epsilon );
    }
}



/*************************************************************************/
/*									 */
/*  Zero the frequency tables Freq[][] and ValFreq[] up to MaxVal	 */
/*									 */
/*************************************************************************/
void ResetFreq(DiscrValue MaxVal)
{
    DiscrValue v;
    ClassNo c;

    ForEach(v, 0, MaxVal)
    { 
		ForEach(c, 0, MaxClass)
		{
			Freq[v][c] = 0;
		}
		ValFreq[v] = 0;
    } 
}



/*************************************************************************/
/*									 */
/*  Given tables Freq[][] and ValFreq[], compute the information gain.	 */
/*									 */
/*	    Parameters:							 */
/*		BaseInfo:	average information for all items with	 */
/*				known values of the test attribute	 */
/*		UnknownRate:	fraction of items with unknown ditto	 */
/*		MaxVal:		number of forks				 */
/*		TotalItems:	number of items with known values of	 */
/*				test att				 */
/*									 */
/*  where Freq[x][y] contains the no. of cases with value x for a	 */
/*  particular attribute that are members of class y,			 */
/*  and ValFreq[x] contains the no. of cases with value x for a		 */
/*  particular attribute						 */
/*									 */
/*************************************************************************/
float ComputeGain(float BaseInfo, float UnknFrac, DiscrValue MaxVal, ItemCount TotalItems)
{
    DiscrValue v;
    float ThisInfo=0.0, ThisGain;
    short ReasonableSubsets=0;

    /*  Check whether all values are unknown or the same  */

    if ( ! TotalItems ) return -Epsilon;

    /*  There must be at least two subsets with MINOBJS items  */

    ForEach(v, 1, MaxVal)
    {
		if ( ValFreq[v] >= MINOBJS ) ReasonableSubsets++;
    }
    if ( ReasonableSubsets < 2 ) return -Epsilon;

    /*  Compute total info after split, by summing the
	info of each of the subsets formed by the test  */

    ForEach(v, 1, MaxVal)
    {
		ThisInfo += TotalInfo(Freq[v], 0, MaxClass);
    }

    /*  Set the gain in information for all items, adjusted for unknowns  */

    ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems);

    Verbosity(5)
        fprintf(fLog,"ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n",
    		TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain);

    return ThisGain;
}



/*************************************************************************/
/*									 */
/*  Compute the total information in V[ MinVal..MaxVal ]		 */
/*									 */
/*************************************************************************/
float TotalInfo(ItemCount V[], DiscrValue MinVal, DiscrValue MaxVal)
{
    DiscrValue v;
    float Sum=0.0;
    ItemCount N, TotalItems=0;

    ForEach(v, MinVal, MaxVal)
    {
		N = V[v];

		Sum += (float)(N * Log(N));
		TotalItems += N;
    }

    return (float )(TotalItems * Log(TotalItems) - Sum);
}



/*************************************************************************/
/*									 */
/*	Print distribution table for given attribute			 */
/*									 */
/*************************************************************************/
void PrintDistribution(Attribute Att, DiscrValue MaxVal, Boolean ShowNames)
{
    DiscrValue v;
    ClassNo c;
    String Val;

    fprintf(fLog,"\n\t\t\t ");
    ForEach(c, 0, MaxClass)
    {
		fprintf(fLog,"%7.6s", ClassName[c]);
    }
    fprintf(fLog,"\n");

    ForEach(v, 0, MaxVal)
    {
		if ( ShowNames )
		{
			Val = ( !v ? "unknown" :
				MaxAttVal[Att] ? AttValName[Att][v] :
				v == 1 ? "below" : "above" );
			fprintf(fLog,"\t\t[%-7.7s:", Val);
		}
		else
		{
			fprintf(fLog,"\t\t[%-7d:", v);
		}

		ForEach(c, 0, MaxClass)
		{
			fprintf(fLog," %6.1f", Freq[v][c]);
		}

		fprintf(fLog,"]\n");
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -