⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 info.cpp

📁 计算机人工智能方面的决策树方法 c4.5
💻 CPP
字号:
/*************************************************************************/
/*                                                                       */
/*      Calculate information, information gain, and print dists         */
/*      --------------------------------------------------------         */
/*                                                                       */
/*************************************************************************/


#include "buildex.h"
#include "c45types.h"
#include "extern.h"
extern FILE *fpScreen;


/*************************************************************************/
/*                                                                       */
/*  Determine the worth of a particular split according to the           */
/*  operative criterion                                                  */
/*                                                                       */
/*          Parameters:                                                  */
/*              SplitInfo:      potential info of the split              */
/*              SplitGain:      gain in info of the split                */
/*              MinGain:        gain above which the Gain Ratio          */
/*                              may be used                              */
/*                                                                       */
/*  If the Gain criterion is being used, the information gain of         */
/*  the split is returned, but if the Gain Ratio criterion is            */
/*  being used, the ratio of the information gain of the split to        */
/*  its potential information is returned.                               */
/*                                                                       */
/*************************************************************************/


double Worth(double ThisInfo, double ThisGain, double MinGain)
{
    if ( GAINRATIO )
    {
	if ( ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon )
	{
	    return ThisGain / ThisInfo;
	}
	else
	{
	    return -Epsilon;
	}
    }
    else
    {
	return ThisInfo > 0 && ThisGain > 0 ? ThisGain : - Epsilon;
    }
}



/*************************************************************************/
/*                                                                       */
/*  Zero the frequency tables Freq[][] and ValFreq[] up to MaxVal        */
/*                                                                       */
/*************************************************************************/


void    ResetFreq(DiscrValue MaxVal)
/*  ---------  */
{
    DiscrValue v;
    ClassNo c;

    ForEach(v, 0, MaxVal)
    { 
	ForEach(c, 0, MaxClass)
	{
	    Freq[v][c] = 0;
	}
	ValFreq[v] = 0;
    } 
}



/*************************************************************************/
/*                                                                       */
/*  Given tables Freq[][] and ValFreq[], compute the information gain.   */
/*                                                                       */
/*          Parameters:                                                  */
/*              BaseInfo:       average information for all items with   */
/*                              known values of the test attribute       */
/*              UnknownRate:    fraction of items with unknown ditto     */
/*              MaxVal:         number of forks                          */
/*              TotalItems:     number of items with known values of     */
/*                              test att                                 */
/*                                                                       */
/*  where Freq[x][y] contains the no. of cases with value x for a        */
/*  particular attribute that are members of class y,                    */
/*  and ValFreq[x] contains the no. of cases with value x for a          */
/*  particular attribute                                                 */
/*                                                                       */
/*************************************************************************/


double ComputeGain(double BaseInfo, double UnknFrac, DiscrValue MaxVal, ItemCount TotalItems)
/*    -----------  */
{
    DiscrValue v;
    double ThisInfo=0.0, ThisGain;
    int ReasonableSubsets=0;

    /*  Check whether all values are unknown or the same  */

    if ( ! TotalItems ) return -Epsilon;

    /*  There must be at least two subsets with MINOBJS items  */

    ForEach(v, 1, MaxVal)
    {
	if ( ValFreq[v] >= MINOBJS ) ReasonableSubsets++;
    }
    if ( ReasonableSubsets < 2 ) return -Epsilon;

    /*  Compute total info after split, by summing the
	info of each of the subsets formed by the test  */

    ForEach(v, 1, MaxVal)
    {
	ThisInfo += TotalInfo(Freq[v], 0, MaxClass);
    }

    /*  Set the gain in information for all items, adjusted for unknowns  */

    ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems);

    Verbosity(5)
	{
		fprintf(fpScreen,"ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n",
			TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain);
		printf("ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n",
			TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain);
	}

    return ThisGain;
}



/*************************************************************************/
/*                                                                       */
/*  Compute the total information in V[ MinVal..MaxVal ]                 */
/*                                                                       */
/*************************************************************************/


double TotalInfo(ItemCount V[], DiscrValue MinVal, DiscrValue MaxVal)
/*    ---------  */
{
    DiscrValue v;
    double Sum=0.0;
    ItemCount N, TotalItems=0;

    ForEach(v, MinVal, MaxVal)
    {
	N = V[v];

	Sum += N * Log(N);
	TotalItems += N;
    }

    return TotalItems * Log(TotalItems) - Sum;
}



/*************************************************************************/
/*                                                                       */
/*      Print distribution table for given attribute                     */
/*                                                                       */
/*************************************************************************/


void    PrintDistribution(Attribute Att, DiscrValue MaxVal,Boolean ShowNames)
/*  -----------------  */
{
    DiscrValue v;
    ClassNo c;
    String Val;

    fprintf(fpScreen,"\n\t\t\t ");
	printf("\n\t\t\t ");
    ForEach(c, 0, MaxClass)
    {
	fprintf(fpScreen,"%7.6s", ClassName[c]);
	printf("%7.6s", ClassName[c]);
    }
    fprintf(fpScreen,"\n");
	printf("\n");

    ForEach(v, 0, MaxVal)
    {
	if ( ShowNames )
	{
	    Val = ( !v ? "unknown" :
		    MaxAttVal[Att] ? AttValName[Att][v] :
		    v == 1 ? "below" : "above" );
	    fprintf(fpScreen,"\t\t[%-7.7s:", Val);
		printf("\t\t[%-7.7s:", Val);
	}
	else
	{
	    fprintf(fpScreen,"\t\t[%-7d:", v);
		printf("\t\t[%-7d:", v);
	}

	ForEach(c, 0, MaxClass)
	{
	    fprintf(fpScreen," %6.1f", Freq[v][c]);
		printf(" %6.1f", Freq[v][c]);
	}

	fprintf(fpScreen,"]\n");
	printf("]\n");
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -