⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c4.5gt.c

📁 决策树c4.5算法的c++实现 希望对大家有用
💻 C
📖 第 1 页 / 共 5 页
字号:
	    if ( Kp <= Ep )	    {		Factor = CountItems(Kp, Ep) / KnownCases;		ForEach(i, Fp, Kp-1)		{		    Weight[i] *= Factor;		}		Node->Branch[v] = FormTree(Fp, Ep);		Node->Errors += Node->Branch[v]->Errors;		Group(0, Fp, Ep, Node);		ForEach(i, Fp, Kp-1)		{		    Weight[i] /= Factor;		}	    }	    else	    {		Node->Branch[v] = Leaf(Node->ClassDist, BestClass, 0.0, 0.0);	    }	}	--Tested[BestAtt];	AllKnown = PrevAllKnown;	/*  See whether we would have been no worse off with a leaf  */	if ( Node->Errors >= Cases - NoBestClass - Epsilon )	{ 	    Verbosity(1)		printf("Collapse tree for %d items to leaf %s\n",	                Lp - Fp + 1, ClassName[BestClass]);	    Node->NodeType = 0;	}     }    else    { 	Verbosity(1)	    printf("\tno sensible splits  %.1f/%.1f\n",		   Cases, Cases - NoBestClass);    }     return Node; } /*************************************************************************//*								 	 *//*  Group together the items corresponding to branch V of a test 	 *//*  and return the index of the last such			 	 *//*								 	 *//*  Note: if V equals zero, group the unknown values		 	 *//*								 	 *//*************************************************************************/ItemNo Group(V, Fp, Lp, TestNode)/*     -----  */    DiscrValue V;    ItemNo Fp, Lp;    Tree TestNode;{    ItemNo i;    Attribute Att;    float Thresh;    Set SS;    void Swap();    Att = TestNode->Tested;    if ( V )    {	/*  Group items on the value of attribute Att, and depending	    on the type of branch  */	switch ( TestNode->NodeType )	{	    case BrDiscr:		ForEach(i, Fp, Lp)		{		    if ( DVal(Item[i], Att) == V ) Swap(Fp++, i);		}		break;	    case ThreshContin:		Thresh = TestNode->Cut;		ForEach(i, Fp, Lp)		{		    if ( (CVal(Item[i], Att) <= Thresh) == (V == 1) ) Swap(Fp++, i);		}		break;	    case BrSubset:		SS = TestNode->Subset[V];		ForEach(i, Fp, Lp)		{		    if ( In(DVal(Item[i], Att), SS) ) Swap(Fp++, i);		}		break;	}    }    else    {	/*  Group together unknown values  */	switch ( TestNode->NodeType )	{	    case BrDiscr:	    case BrSubset:		ForEach(i, Fp, Lp)		{		    if ( ! DVal(Item[i], Att) ) Swap(Fp++, i);		}		break;	    case ThreshContin:		ForEach(i, Fp, Lp)		{		    if ( CVal(Item[i], Att) == Unknown ) Swap(Fp++, i);		}		break;	}    }    return Fp - 1;}/*************************************************************************//*								 	 *//*	Return the total weight of items from Fp to Lp		 	 *//*								 	 *//*************************************************************************/ItemCount CountItems(Fp, Lp)/*        ----------  */    ItemNo Fp, Lp;{    register ItemCount Sum=0.0, *Wt, *LWt;    if ( AllKnown ) return Lp - Fp + 1;    for ( Wt = Weight + Fp, LWt = Weight + Lp ; Wt <= LWt ; )    {	Sum += *Wt++;    }    return Sum;}/*************************************************************************//*                                                               	 *//*		Exchange items at a and b			 	 *//*									 *//*************************************************************************/void Swap(a,b)/*   ----  */    ItemNo a, b;{    register Description Hold;    register ItemCount HoldW;    Hold = Item[a];    Item[a] = Item[b];    Item[b] = Hold;    HoldW = Weight[a];    Weight[a] = Weight[b];    Weight[b] = HoldW;}/*************************************************************************//*									 *//*	Calculate information, information gain, and print dists	 *//*	--------------------------------------------------------	 *//*									 *//*************************************************************************//*************************************************************************//*									 *//*  Determine the worth of a particular split according to the		 *//*  operative criterion							 *//*									 *//*	    Parameters:							 *//*		SplitInfo:	potential info of the split		 *//*		SplitGain:	gain in info of the split		 *//*		MinGain:	gain above which the Gain Ratio		 *//*				may be used				 *//*									 *//*  If the Gain criterion is being used, the information gain of	 *//*  the split is returned, but if the Gain Ratio criterion is		 *//*  being used, the ratio of the information gain of the split to	 *//*  its potential information is returned.				 *//*									 *//*************************************************************************/float Worth(ThisInfo, ThisGain, MinGain)/*    -----  */    float ThisInfo, ThisGain, MinGain;{    if ( GAINRATIO )    {	if ( ThisGain >= MinGain - Epsilon && ThisInfo > Epsilon )	{	    return ThisGain / ThisInfo;	}	else	{	    return -Epsilon;	}    }    else    {	return ( ThisInfo > 0 && ThisGain > -Epsilon ? ThisGain : -Epsilon );    }}/*************************************************************************//*									 *//*  Zero the frequency tables Freq[][] and ValFreq[] up to MaxVal	 *//*									 *//*************************************************************************/    ResetFreq(MaxVal)/*  ---------  */    DiscrValue MaxVal;{    DiscrValue v;    ClassNo c;    ForEach(v, 0, MaxVal)    { 	ForEach(c, 0, MaxClass)	{	    Freq[v][c] = 0;	}	ValFreq[v] = 0;    } }/*************************************************************************//*									 *//*  Given tables Freq[][] and ValFreq[], compute the information gain.	 *//*									 *//*	    Parameters:							 *//*		BaseInfo:	average information for all items with	 *//*				known values of the test attribute	 *//*		UnknownRate:	fraction of items with unknown ditto	 *//*		MaxVal:		number of forks				 *//*		TotalItems:	number of items with known values of	 *//*				test att				 *//*									 *//*  where Freq[x][y] contains the no. of cases with value x for a	 *//*  particular attribute that are members of class y,			 *//*  and ValFreq[x] contains the no. of cases with value x for a		 *//*  particular attribute						 *//*									 *//*************************************************************************/float ComputeGain(BaseInfo, UnknFrac, MaxVal, TotalItems)/*    -----------  */    float BaseInfo, UnknFrac;    DiscrValue MaxVal;    ItemCount TotalItems;{    DiscrValue v;    float ThisInfo=0.0, ThisGain, TotalInfo();    short ReasonableSubsets=0;    /*  Check whether all values are unknown or the same  */    if ( ! TotalItems ) return -Epsilon;    /*  There must be at least two subsets with MINOBJS items  */    ForEach(v, 1, MaxVal)    {	if ( ValFreq[v] >= MINOBJS ) ReasonableSubsets++;    }    if ( ReasonableSubsets < 2 ) return -Epsilon;    /*  Compute total info after split, by summing the	info of each of the subsets formed by the test  */    ForEach(v, 1, MaxVal)    {	ThisInfo += TotalInfo(Freq[v], 0, MaxClass);    }    /*  Set the gain in information for all items, adjusted for unknowns  */    ThisGain = (1 - UnknFrac) * (BaseInfo - ThisInfo / TotalItems);    Verbosity(5)        printf("ComputeThisGain: items %.1f info %.3f base %.3f unkn %.3f result %.3f\n",    		TotalItems + ValFreq[0], ThisInfo, BaseInfo, UnknFrac, ThisGain);    return ThisGain;}/*************************************************************************//*									 *//*  Compute the total information in V[ MinVal..MaxVal ]		 *//*									 *//*************************************************************************/float TotalInfo(V, MinVal, MaxVal)/*    ---------  */    ItemCount V[];    DiscrValue MinVal, MaxVal;{    DiscrValue v;    float Sum=0.0;    ItemCount N, TotalItems=0;    ForEach(v, MinVal, MaxVal)    {	N = V[v];	Sum += N * Log(N);	TotalItems += N;    }    return TotalItems * Log(TotalItems) - Sum;}/*************************************************************************//*									 *//*	Print distribution table for given attribute			 *//*									 *//*************************************************************************/    PrintDistribution(Att, MaxVal, ShowNames)/*  -----------------  */    Attribute Att;    DiscrValue MaxVal;    Boolean ShowNames;{    DiscrValue v;    ClassNo c;    String Val;    printf("\n\t\t\t ");    ForEach(c, 0, MaxClass)    {	printf("%7.6s", ClassName[c]);    }    printf("\n");    ForEach(v, 0, MaxVal)    {	if ( ShowNames )	{	    Val = ( !v ? "unknown" :		    MaxAttVal[Att] ? AttValName[Att][v] :		    v == 1 ? "below" : "above" );	    printf("\t\t[%-7.7s:", Val);	}	else	{	    printf("\t\t[%-7d:", v);	}	ForEach(c, 0, MaxClass)	{	    printf(" %6.1f", Freq[v][c]);	}	printf("]\n");    }}/*************************************************************************//*									 *//*	Evaluation of a test on a discrete valued attribute		 *//*      ---------------------------------------------------		 *//*									 *//*************************************************************************//*************************************************************************//*									 *//*  Set Info[] and Gain[] for discrete partition of items Fp to Lp	 *//*									 *//*************************************************************************/    EvalDiscreteAtt(Att, Fp, Lp, Items)/*  ---------------  */     Attribute Att;    ItemNo Fp, Lp;     ItemCount Items;{     ItemCount KnownItems;    float DiscrKnownBaseInfo(), ComputeGain(), TotalInfo();    ComputeFrequencies(Att, Fp, Lp);    KnownItems = Items - ValFreq[0];    /*  Special case when no known values of the attribute  */    if ( Items <= ValFreq[0] )    {	Verbosity(2) printf("\tAtt %s: no known values\n", AttName[Att]);	Gain[Att] = -Epsilon;	Info[Att] = 0.0;	return;    }    Gain[Att] = ComputeGain(DiscrKnownBaseInfo(KnownItems, MaxAttVal[Att]),			    UnknownRate[Att], MaxAttVal[Att], KnownItems);    Info[Att] = TotalInfo(ValFreq, 0, MaxAttVal[Att]) / Items;    Verbosity(2)    {    	printf("\tAtt %s", AttName[Att]);    	Verbosity(3) PrintDistribution(Att, MaxAttVal[Att], true);    	printf("\tinf %.3f, gain %.3f\n", Info[Att], Gain[Att]);    }} /*************************************************************************//*									 *//*  Compute frequency tables Freq[][] and ValFreq[] for attribute	 *//*  Att from items Fp to Lp, and set the UnknownRate for Att		 *//*									 *//*************************************************************************/    ComputeFrequencies(Att, Fp, Lp)/*  ------------------  */    Attribute Att;    ItemNo Fp, Lp;{    Description Case;     ClassNo c;    DiscrValue v;    ItemCount CountItems();    ItemNo p;    ResetFreq(MaxAttVal[Att]);    /*  Determine the frequency of each class amongst cases	with each possible value for the given attribute  */    ForEach(p, Fp, Lp)    { 	Case = Item[p];	Freq[ DVal(Case,Att) ][ Class(Case) ] += Weight[p];    }     /*  Determine the frequency of each possible value for the	given attribute  */    ForEach(v, 0, MaxAttVal[Att])     { 	ForEach(c, 0, MaxClass)	{	    ValFreq[v] += Freq[v][c];	}    }    /*  Set the rate of unknown values of the attribute  */     UnknownRate[Att] = ValFreq[0] / CountItems(Fp, Lp);}/*************************************************************************//*									 *//*  Return the base info for items with known values of a discrete	 *//*  attribute, using the frequency table Freq[][]			 *//*	 								 *//*************************************************************************/float DiscrKnownBaseInfo(KnownItems, MaxVal)/*    ------------------  */    DiscrValue MaxVal;    ItemCount KnownItems;{    ClassNo c;    ItemCount ClassCount;    double Sum=0;    DiscrValue v;    ForEach(c, 0, MaxClass)    {	ClassCount = 0;	ForEach(v, 1, MaxVal)	{	    ClassCount += Freq[v][c];	}	Sum += ClassCount * Log(ClassCount);    }    return (KnownItems * Log(KnownItems) - Sum) / KnownItems;}/*************************************************************************//*									 *//*  Construct and return a node for a test on a discrete attribute	 *//*									 *//*************************************************************************/    DiscreteTest(Node, Att)/*  ----------  */    Tree Node;    Attribute Att;{    ItemCount CountItems();    Sprout(Node, MaxAttVal[Att]);    Node->NodeType	= BrDiscr;    Node->Tested	= Att;    Node->Errors	= 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -