⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 contin.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
字号:
/*************************************************************************/
/*                                                                	 */
/*	Evaluation of a test on a continuous valued attribute	  	 */
/*	-----------------------------------------------------	  	 */
/*								  	 */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

extern FILE *fLog;
extern ItemCount
	*Weight,	/* Weight[i]  = current fraction of item i */
	**Freq,		/* Freq[x][c] = no. items of class c with outcome x */
	*ValFreq;	/* ValFreq[x] = no. items with att value v */

extern float
	*Gain,		/* Gain[a] = info gain by split on att a */
	*Info,		/* Info[a] = potential info from split on att a */
	*Bar,		/* Bar[a]  = best threshold for contin att a */
	*UnknownRate;	/* UnknownRate[a] = current unknown rate for att a */

extern char
	*Tested;	/* Tested[a] = true if att a already tested */

float
	*SplitGain,	/* SplitGain[i] = gain with att value of item i as threshold */
	*SplitInfo;	/* SplitInfo[i] = potential info ditto */



/*************************************************************************/
/*								  	 */
/*  Continuous attributes are treated as if they have possible values	 */
/*	0 (unknown), 1 (less than cut), 2(greater than cut)	  	 */
/*  This routine finds the best cut for items Fp through Lp and sets	 */
/*  Info[], Gain[] and Bar[]						 */
/*								  	 */
/*************************************************************************/
void EvalContinuousAtt(Attribute Att, ItemNo Fp, ItemNo Lp)
{ 
    ItemNo i, BestI, Xp, Tries=0;
    ItemCount Items, KnownItems, LowItems, MinSplit;
    ClassNo c;
    float AvGain=0, Val, BestVal, BaseInfo, ThreshCost;

    //Verbosity(2) fprintf(fLog,"\tAtt %s", AttName[Att]);
    //Verbosity(3) fprintf(fLog,"\n");

    ResetFreq(2);

    /*  Omit and count unknown values */

    Items = CountItems(Fp, Lp);
    Xp = Fp;
    ForEach(i, Fp, Lp)
    {
		if ( CVal(Item[i],Att) == Unknown )
		{
			Freq[ 0 ][ Class(Item[i]) ] += Weight[i];
			Swap(Xp, i);
			Xp++;
		}
    }

    ValFreq[0] = 0;
    ForEach(c, 0, MaxClass)
    {
		ValFreq[0] += Freq[0][c];
    }

    KnownItems = Items - ValFreq[0];
    UnknownRate[Att] = 1.0 - KnownItems / Items;

    /*  Special case when very few known values  */

    if ( KnownItems < 2 * MINOBJS )
    {
		//Verbosity(2) fprintf(fLog,"\tinsufficient cases with known values\n");

		Gain[Att] = -Epsilon;
		Info[Att] = 0.0;
		return;
    }

    Quicksort(Xp, Lp, Att, 0);

    /*  Count base values and determine base information  */

    ForEach(i, Xp, Lp)
    {
		Freq[ 2 ][ Class(Item[i]) ] += Weight[i];
		SplitGain[i] = -Epsilon;
		SplitInfo[i] = 0;
    }

    BaseInfo = TotalInfo(Freq[2], 0, MaxClass) / KnownItems;

    /*  Try possible cuts between items i and i+1, and determine the
	information and gain of the split in each case.  We have to be wary
	of splitting a small number of items off one end, as we can always
	split off a single item, but this has little predictive power.  */

    MinSplit = 0.10 * KnownItems / (MaxClass + 1);
    if ( MinSplit <= MINOBJS )
		MinSplit = MINOBJS;
    else if ( MinSplit > 25 ) 
		MinSplit = 25;

    LowItems = 0;
    ForEach(i, Xp, Lp - 1)
    {
		c = Class(Item[i]);
		LowItems   += Weight[i];
		Freq[1][c] += Weight[i];
		Freq[2][c] -= Weight[i];

		if ( LowItems < MinSplit ) continue;
		else
		if ( LowItems > KnownItems - MinSplit ) break;

		if ( CVal(Item[i],Att) < CVal(Item[i+1],Att) - 1E-5 )
		{
			ValFreq[1] = LowItems;
			ValFreq[2] = KnownItems - LowItems;
			SplitGain[i] = ComputeGain(BaseInfo, UnknownRate[Att], 2, KnownItems);
			SplitInfo[i] = TotalInfo(ValFreq, 0, 2) / Items;
			AvGain += SplitGain[i];
			Tries++;

			Verbosity(3)
			{/**
				fprintf(fLog,"\t\tCut at %.3f  (gain %.3f, val %.3f):",
					   ( CVal(Item[i],Att) + CVal(Item[i+1],Att) ) / 2,
	    			   SplitGain[i],
	    			   Worth(SplitInfo[i], SplitGain[i], Epsilon));
	    			   PrintDistribution(Att, 2, true);/**/
			}
		}
    }

    /*  Find the best attribute according to the given criterion  */

    ThreshCost = Log(Tries) / Items;

    BestVal = 0;
    BestI   = None;
    ForEach(i, Xp, Lp - 1)
    {
		if ( (Val = SplitGain[i] - ThreshCost) > BestVal )
		{
			BestI   = i;
			BestVal = Val;
		}
	}

    /*  If a test on the attribute is able to make a gain,
	set the best break point, gain and information  */ 

    if ( BestI == None )
    {
		Gain[Att] = -Epsilon;
		Info[Att] = 0.0;

		Verbosity(2) fprintf(fLog,"\tno gain\n");
    }
    else
    {
		Bar[Att]  = (CVal(Item[BestI],Att) + CVal(Item[BestI+1],Att)) / 2;
		Gain[Att] = BestVal;
		Info[Att] = SplitInfo[BestI];
/**
		Verbosity(2)
			fprintf(fLog,"\tcut=%.3f, inf %.3f, gain %.3f\n",
			   Bar[Att], Info[Att], Gain[Att]);/**/
    }
} 



/*************************************************************************/
/*                                                                	 */
/*  Change a leaf into a test on a continuous attribute           	 */
/*                                                                	 */
/*************************************************************************/
void ContinTest(Tree Node, Attribute Att)
{
    float Thresh;

    Sprout(Node, 2);

    Thresh = GreatestValueBelow(Att, Bar[Att]);

    Node->NodeType	= ThreshContin;
    Node->Tested	= Att;
    Node->Cut		=
    Node->Lower		=
    Node->Upper		= Thresh;
    Node->Errors        = 0;
}

/*************************************************************************/
/*                                                                	 */
/*  Return the greatest value of attribute Att below threshold t  	 */
/*                                                                	 */
/*************************************************************************/
float GreatestValueBelow(Attribute Att, float t)
{
    ItemNo i;
    float v, Best;
    Boolean NotYet=true;

    ForEach(i, 0, MaxItem)
    {
		v = CVal(Item[i], Att);
		if ( v != Unknown && v <= t && ( NotYet || v > Best ) )
		{
			Best = v;
			NotYet = false;
		}
    }

    return Best;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -