⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 contin.cpp

📁 One kind of decision-making tree algorithm, can be seen as one kind data mining algorithm ,find the
💻 CPP
字号:
/*************************************************************************/
/*                                                                       */
/*      Evaluation of a test on a continuous valued attribute            */
/*      -----------------------------------------------------            */
/*                                                                       */
/*************************************************************************/


#include "buildex.h"
#include "extern.h"


double
*SplitGain,     /* SplitGain[i] = gain with att value of item i as threshold */
*SplitInfo;     /* SplitInfo[i] = potential info ditto */

extern FILE *fpScreen;

/*************************************************************************/
/*                                                                       */
/*  Continuous attributes are treated as if they have possible values    */
/*      0 (unknown), 1 (less than cut), 2(greater than cut)              */
/*  This routine finds the best cut for items Fp through Lp and sets     */
/*  Info[], Gain[] and Bar[]                                             */
/*                                                                       */
/*************************************************************************/


void    EvalContinuousAtt(Attribute Att, ItemNo Fp, ItemNo Lp)
/*  -----------------  */ 
{ 
    ItemNo i, BestI, Xp, Tries=0;
    ItemCount Items, KnownItems, LowItems, MinSplit;
    ClassNo c;
    double AvGain=0, Val, BestVal, BaseInfo;
        
        
    Verbosity(2) 
        {
                fprintf(fpScreen,"\tAtt %s", AttName[Att]);
                printf("\tAtt %s", AttName[Att]);
        }
    Verbosity(3) 
        {
                fprintf(fpScreen,"\n");
                printf("\n");
        }
        
    ResetFreq(2);
        
    /*  Omit and count unknown values */
        
    Items = CountItems(Fp, Lp);
    Xp = Fp;
    ForEach(i, Fp, Lp)
    {
                if ( CVal(Item[i],Att) == Unknown )
                {
                        Freq[ 0 ][ Class(Item[i]) ] += Weight[i];
                        Swap(Xp, i);
                        Xp++;
                }
    }
        
    ValFreq[0] = 0;
    ForEach(c, 0, MaxClass)
    {
                ValFreq[0] += Freq[0][c];
    }
        
    KnownItems = Items - ValFreq[0];
    UnknownRate[Att] = 1.0 - KnownItems / Items;
        
    /*  Special case when very few known values  */
        
    if ( KnownItems < 2 * MINOBJS )
    {
                Verbosity(2)
                {
                        fprintf(fpScreen,"\tinsufficient cases with known values\n");
                        printf("\tinsufficient cases with known values\n");
                }
                
                Gain[Att] = -Epsilon;
                Info[Att] = 0.0;
                return;
    }
        
    Quicksort(Xp, Lp, Att, Swap);
        
    /*  Count base values and determine base information  */
        
    ForEach(i, Xp, Lp)
    {
                Freq[ 2 ][ Class(Item[i]) ] += Weight[i];
                SplitGain[i] = -Epsilon;
                SplitInfo[i] = 0;
    }
        
    BaseInfo = TotalInfo(Freq[2], 0, MaxClass) / KnownItems;
        
    /*  Try possible cuts between items i and i+1, and determine the
        information and gain of the split in each case.  We have to be wary
        of splitting a small number of items off one end, as we can always
        split off a single item, but this has little predictive power.  */
        
    MinSplit = 0.10 * KnownItems / (MaxClass + 1);
    if ( MinSplit <= MINOBJS ) MinSplit = MINOBJS;
    else
                if ( MinSplit > 25 ) MinSplit = 25;
                
                LowItems = 0;
                ForEach(i, Xp, Lp - 1)
                {
                        c = Class(Item[i]);
                        LowItems   += Weight[i];
                        Freq[1][c] += Weight[i];
                        Freq[2][c] -= Weight[i];
                        
                        if ( LowItems < MinSplit ) continue;
                        else
                                if ( LowItems > KnownItems - MinSplit ) break;
                                
                                if ( CVal(Item[i],Att) < CVal(Item[i+1],Att) - 1E-5 )
                                {
                                        ValFreq[1] = LowItems;
                                        ValFreq[2] = KnownItems - LowItems;
                                        SplitGain[i] = ComputeGain(BaseInfo, UnknownRate[Att], 2, KnownItems);
                                        SplitInfo[i] = TotalInfo(ValFreq, 0, 2) / Items;
                                        AvGain += SplitGain[i];
                                        Tries++;
                                        
                                        Verbosity(3)
                                        {   fprintf(fpScreen,"\t\tCut at %.3f  (gain %.3f, val %.3f):",
                                        ( CVal(Item[i],Att) + CVal(Item[i+1],Att) ) / 2,
                                        SplitGain[i],
                                        Worth(SplitInfo[i], SplitGain[i], Epsilon));
                                        PrintDistribution(Att, 2, true);
                                        printf("\t\tCut at %.3f  (gain %.3f, val %.3f):",
                                                ( CVal(Item[i],Att) + CVal(Item[i+1],Att) ) / 2,
                                                SplitGain[i],
                                                Worth(SplitInfo[i], SplitGain[i], Epsilon));
                                        PrintDistribution(Att, 2, true);
                                        }
                                }
                }
                
                /*  Find the best attribute according to the given criterion  */
                
                BestVal = -Epsilon;
                BestI   = None;
                AvGain  = ( Tries ? AvGain / Tries : 1E6 );
                ForEach(i, Xp, Lp - 1)
                {
                        Val = Worth(SplitInfo[i], SplitGain[i], AvGain);
                        if ( SplitGain[i] >= 0 && Val >= BestVal )
                        {
                                BestI   = i;
                                BestVal = Val;
                        }
                }
                
                /*  If a test on the attribute is able to make a gain,
                set the best break point, gain and information  */ 
                
                if ( BestI == None )
                {
                        Gain[Att] = -Epsilon;
                        Info[Att] = 0.0;
                        
                        Verbosity(2) 
                        {
                                fprintf(fpScreen,"\tno gain\n");
                                printf("\tno gain\n");
                        }       
                }
                else
                {
                        Bar[Att]  = (CVal(Item[BestI],Att) + CVal(Item[BestI+1],Att)) / 2;
                        Gain[Att] = SplitGain[BestI];
                        Info[Att] = SplitInfo[BestI];
                        
                        Verbosity(2)
                        {
                                fprintf(fpScreen,"\tcut=%.3f, inf %.3f, gain %.3f\n",
                                Bar[Att], Info[Att], Gain[Att]);
                                printf("\tcut=%.3f, inf %.3f, gain %.3f\n",
                                Bar[Att], Info[Att], Gain[Att]);
                        }
                }
} 



/*************************************************************************/
/*                                                                       */
/*  Change a leaf into a test on a continuous attribute                  */
/*                                                                       */
/*************************************************************************/


void ContinTest(Tree Node, Attribute Att)
/*  ----------  */
{
    double Thresh;
    
        
    Sprout(Node, 2);
        
    Thresh = GreatestValueBelow(Att, Bar[Att]);
        
    Node->NodeType      = ThreshContin;
    Node->Tested        = Att;
    Node->Cut           =
                Node->Lower         =
                Node->Upper         = Thresh;
    Node->Errors        = 0;
}



/*************************************************************************/
/*                                                                       */
/*  Return the greatest value of attribute Att below threshold t         */
/*                                                                       */
/*************************************************************************/


double GreatestValueBelow(Attribute Att, double t)
/*    ------------------  */
{
    ItemNo i;
    double v, Best;
        
    Best = -1E20;
        
    ForEach(i, 0, MaxItem)
    {
                v = CVal(Item[i], Att);
                if ( v != Unknown && v <= t && v > Best ) Best = v;
    }

    return Best;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -