⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c4.5gt.c

📁 决策树c4.5算法的c++实现 希望对大家有用
💻 C
📖 第 1 页 / 共 5 页
字号:
/*************************************************************************//*									 *//*		Definitions used in C4.5				 *//*              ------------------------				 *//*									 *//*************************************************************************/#include <stdio.h>#include <math.h>#define	 Eof			EOF             /*char read on end of file*/#define	 Nil			0               /*null pointer*/#define	 false			0 #define	 true			1 #define	 None			-1#define	 Epsilon                1E-3//long	 random();#define	 Random			((rand()&2147483647) / 2147483648.0)#define	 Max(a,b)               ((a)>(b) ? a : b) #define	 Min(a,b)               ((a)<(b) ? a : b) #define	 Round(x)		((int) (x+0.5))#define	 Log2			0.69314718055994530942#define	 Log(x)			((x) <= 0 ? 0.0 : log((float)x) / Log2)#define	 Bit(b)			(1 << (b))#define	 In(b,s)		((s[(b) >> 3]) & Bit((b) & 07))#define	 ClearBits(n,s)		memset(s,0,n)#define	 CopyBits(n,f,t)	memcpy(t,f,n)#define	 SetBit(b,s)		(s[(b) >> 3] |= Bit((b) & 07))#define	 ForEach(v,f,l)		for(v=f ; v<=l ; ++v) #define	 Verbosity(d)		if(VERBOSITY >= d)#define	 Check(v,l,h)\	     if ( v<l||v>h ) {printf("\t** illegal value **\n"); exit(1);}/*************************************************************************//*									 *//*		Type definitions for C4.5				 *//*              -------------------------				 *//*									 *//*************************************************************************/typedef  char	Boolean, *String, *Set;typedef  int	ItemNo;		/* data item number */typedef  float	ItemCount;	/* count of (partial) items */typedef  short	ClassNo,	/* class number, 0..MaxClass */		DiscrValue;	/* discrete attribute value (0 = ?) */typedef  short	Attribute;	/* attribute number, 0..MaxAtt */typedef  union  _attribute_value	 {	    DiscrValue	_discr_val;	    float	_cont_val;	 }	 	AttValue, *Description;#define  CVal(Case,Attribute)   Case[Attribute]._cont_val#define  DVal(Case,Attribute)   Case[Attribute]._discr_val#define  Class(Case)		Case[MaxAtt+1]._discr_val#define  Unknown  -999		/* unknown value for continuous attribute */#define  BrDiscr	1	/* node types:	branch */#define  ThreshContin	2	/*		threshold cut */#define  BrSubset	3	/*		subset test */typedef  struct _tree_record *Tree;typedef  struct _tree_record	 {	    short	NodeType;	/* 0=leaf 1=branch 2=cut 3=subset */	    ClassNo	Leaf;		/* most frequent class at this node */	    ItemCount	Items,		/* no of items at this node */			*ClassDist,	/* class distribution of items */	    		Errors;		/* no of errors at this node */	    Attribute	Tested; 	/* attribute referenced in test */	    short	Forks;		/* number of branches at this node */	    float	Cut,		/* threshold for continuous attribute */		  	Lower,		/* lower limit of soft threshold */		  	Upper;		/* upper limit ditto */	    Set         *Subset;	/* subsets of discrete values  */	    Tree	*Branch;	/* Branch[x] = (sub)tree for outcome x */	 }		TreeRec;#define  IGNORE		1	/* special attribute status: do not use */#define  DISCRETE	2	/* ditto: collect values as data read */typedef short	RuleNo;			/* rule number */typedef struct TestRec *Test;struct TestRec       {	   short	NodeType;	/* test type (see tree nodes) */	   Attribute	Tested;		/* attribute tested */	   short	Forks;		/* possible branches */	   float	Cut;		/* threshold (if relevant) */	   Set		*Subset;	/* subset (if relevant) */       };typedef struct CondRec *Condition;struct CondRec       {	   Test		CondTest;	/* test part of condition */	   short	TestValue;	/* specified outcome of test */       };typedef struct ProdRuleRec PR;struct ProdRuleRec       {	   short	Size;		/* number of conditions */	   Condition	*Lhs;		/* conditions themselves */	   ClassNo	Rhs;		/* class given by rule */	   float	Error,		/* estimated error rate */			Bits;		/* bits to encode rule */	   ItemNo	Used,		/* times rule used */			Incorrect;	/* times rule incorrect */       };typedef struct RuleSetRec RuleSet;struct RuleSetRec       {	   PR		*SRule;		/* rules */	   RuleNo	SNRules,	/* number of rules */			*SRuleIndex;	/* ranking of rules */	   ClassNo	SDefaultClass;	/* default class for this ruleset */    };/*************************************************************************//*									 *//*	Main routine, c4.5						 *//*	------------------						 *//*									 *//*************************************************************************/short		MaxAtt, MaxClass, MaxDiscrVal = 2;ItemNo		MaxItem;Description	*Item;DiscrValue	*MaxAttVal;char		*SpecialStatus;String		*ClassName,		*AttName,		**AttValName,		FileName = "DF";short		VERBOSITY = 0,		TRIALS    = 10;Boolean		GAINRATIO  = true,		SUBSET     = false,		BATCH      = true,		UNSEENS    = false,		PROBTHRESH = false;ItemNo		MINOBJS   = 2,		WINDOW    = 0,		INCREMENT = 0;float		CF = 0.25;Tree		*Pruned;Boolean		AllKnown = true;int optind = 1;
char *optarg;getopt(Argc, Argv, Str)
/*  ------  */
    int Argc;
    char **Argv, *Str;
{
    int Optchar;
    char *Option;

    if ( optind >= Argc ) return EOF;

    Option = Argv[optind++];

    if ( *Option++ != '-' ) return '?';

    Optchar = *Option++;

    while ( *Str && *Str != Optchar ) Str++;
    if ( ! *Str ) return '?';

    if ( *++Str == ':' )
    {
	if ( *Option ) optarg = Option;
	else
	if ( optind < Argc ) optarg = Argv[optind++];
	else
	Optchar = '?';
    }

    return Optchar;
}
    main(Argc, Argv)/*  ----  */    int Argc;    char *Argv[];{    int o;    extern char *optarg;    Boolean FirstTime=true;    short Best, BestTree();    PrintHeader("decision tree generator");
	scanf("%s",Argv[2]);    /*  Process options  */    while ( (o = getopt(Argc, Argv, "f:bupv:t:w:i:gsm:c:")) != EOF )    {	if ( FirstTime )	{	    printf("\n    Options:\n");	    FirstTime = false;	}	switch (o)	{	case 'f':   FileName = optarg;		    printf("\tFile stem <%s>\n", FileName);		    break;	case 'b':   BATCH = true;		    printf("\tWindowing disabled (now the default)\n");		    break;	case 'u':   UNSEENS = true;		    printf("\tTrees evaluated on unseen cases\n");		    break;	case 'p':   PROBTHRESH = true;		    printf("\tProbability thresholds used\n");		    break;	case 'v':   VERBOSITY = atoi(optarg);		    printf("\tVerbosity level %d\n", VERBOSITY);		    break;	case 't':   TRIALS = atoi(optarg);		    printf("\tWindowing enabled with %d trials\n", TRIALS);		    Check(TRIALS, 1, 10000);		    BATCH = false;		    break;	case 'w':   WINDOW = atoi(optarg);		    printf("\tInitial window size of %d items\n", WINDOW);		    Check(WINDOW, 1, 1000000);		    BATCH = false;		    break;	case 'i':   INCREMENT = atoi(optarg);		    printf("\tMaximum window increment of %d items\n",			   INCREMENT);		    Check(INCREMENT, 1, 1000000);		    BATCH = false;		    break;	case 'g':   GAINRATIO = false;		    printf("\tGain criterion used\n");		    break;	case 's':   SUBSET = true;		    printf("\tTests on discrete attribute groups\n");		    break;	case 'm':   MINOBJS = atoi(optarg);		    printf("\tSensible test requires 2 branches with >=%d cases\n",			    MINOBJS);		    Check(MINOBJS, 1, 1000000);		    break;	case 'c':   CF = atof(optarg);		    printf("\tPruning confidence level %g%%\n", CF);		    Check(CF, Epsilon, 100);		    CF /= 100;		    break;	case '?':   printf("unrecognised option\n");		    exit(1);	}    }    /*  Initialise  */    GetNames();    GetData(".data");    printf("\nRead %d cases (%d attributes) from %s.data\n",	   MaxItem+1, MaxAtt+1, FileName);    /*  Build decision trees  */    if ( BATCH )    {	TRIALS = 1;	OneTree();	Best = 0;    }    else    {	Best = BestTree();    }    /*  Soften thresholds in best tree  */    if ( PROBTHRESH )    {	printf("Softening thresholds");	if ( ! BATCH ) printf(" for best tree from trial %d", Best);	printf("\n");	SoftenThresh(Pruned[Best]);	printf("\n");	PrintTree(Pruned[Best]);    }    /*  Save best tree  */    if ( BATCH || TRIALS == 1 )    {	printf("\nTree saved\n");    }    else    {	printf("\nBest tree from trial %d saved\n", Best);    }    SaveTree(Pruned[Best], ".tree");    /*  Evaluation  */    printf("\n\nEvaluation on training data (%d items):\n", MaxItem+1);    Evaluate(false, Best);    if ( UNSEENS )    {           GetData(".test");        printf("\nEvaluation on test data (%d items):\n", MaxItem+1);        Evaluate(true, Best);    }    exit(0);}/*************************************************************************//*									 *//*	Routines to manage tree growth, pruning and evaluation		 *//*	------------------------------------------------------		 *//*									 *//*************************************************************************/ItemNo		*TargetClassFreq;Tree		*Raw;extern Tree	*Pruned;/*************************************************************************//*									 *//*	Grow and prune a single tree from all data			 *//*									 *//*************************************************************************/    OneTree()/*  ---------  */{    Tree FormTree(), CopyTree();    Boolean Prune();    InitialiseTreeData();    InitialiseWeights();    Raw = (Tree *) calloc(1, sizeof(Tree));    Pruned = (Tree *) calloc(1, sizeof(Tree));    AllKnown = true;    Raw[0] = FormTree(0, MaxItem);    printf("\n");    PrintTree(Raw[0]);    SaveTree(Raw[0], ".unpruned");    Pruned[0] = CopyTree(Raw[0]);    if ( Prune(Pruned[0]) )    {	printf("\nSimplified ");	PrintTree(Pruned[0]);    }}/*************************************************************************//*									 *//*	Grow and prune TRIALS trees and select the best of them		 *//*									 *//*************************************************************************/short BestTree()/*    --------  */{    Tree CopyTree(), Iterate();    Boolean Prune();    short t, Best=0;    InitialiseTreeData();    TargetClassFreq = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));    Raw    = (Tree *) calloc(TRIALS, sizeof(Tree));    Pruned = (Tree *) calloc(TRIALS, sizeof(Tree));    /*  If necessary, set initial size of window to 20% (or twice	the sqrt, if this is larger) of the number of data items,	and the maximum number of items that can be added to the	window at each iteration to 20% of the initial window size  */    if ( ! WINDOW )    {	WINDOW = Max(2 * sqrt(MaxItem+1.0), (MaxItem+1) / 5);    }    if ( ! INCREMENT )    {	INCREMENT = Max(WINDOW / 5, 1);    }    FormTarget(WINDOW);    /*  Form set of trees by iteration and prune  */    ForEach(t, 0, TRIALS-1 )    {        FormInitialWindow();	printf("\n--------\nTrial %d\n--------\n\n", t);	Raw[t] = Iterate(WINDOW, INCREMENT);	printf("\n");	PrintTree(Raw[t]);	SaveTree(Raw[t], ".unpruned");	Pruned[t] = CopyTree(Raw[t]);	if ( Prune(Pruned[t]) )	{	    printf("\nSimplified ");	    PrintTree(Pruned[t]);	}	if ( Pruned[t]->Errors < Pruned[Best]->Errors )	{	    Best = t;	}    }    printf("\n--------\n");    return Best;}/*************************************************************************//*									 *//*  The windowing approach seems to work best when the class		 *//*  distribution of the initial window is as close to uniform as	 *//*  possible.  FormTarget generates this initial target distribution,	 *//*  setting up a TargetClassFreq value for each class.			 *//*									 *//*************************************************************************/    FormTarget(Size)/*  -----------  */    ItemNo Size;{    ItemNo i, *ClassFreq;    ClassNo c, Smallest, ClassesLeft=0;    ClassFreq = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));    /*  Generate the class frequency distribution  */    ForEach(i, 0, MaxItem)    {	ClassFreq[ Class(Item[i]) ]++;    }    /*  Calculate the no. of classes of which there are items  */    ForEach(c, 0, MaxClass)    {	if ( ClassFreq[c] )	{	    ClassesLeft++;	}	else	{	    TargetClassFreq[c] = 0;	}    }    while ( ClassesLeft )    {	/*  Find least common class of which there are some items  */	Smallest = -1;	ForEach(c, 0, MaxClass)	{	    if ( ClassFreq[c] &&		 ( Smallest < 0 || ClassFreq[c] < ClassFreq[Smallest] ) )	    {		Smallest = c;	    }	}	/*  Allocate the no. of items of this class to use in the window  */	TargetClassFreq[Smallest] = Min(ClassFreq[Smallest], Round(Size/ClassesLeft));	ClassFreq[Smallest] = 0;	Size -= TargetClassFreq[Smallest];	ClassesLeft--;    }    free(ClassFreq);}/*************************************************************************//*									 *//*  Form initial window, attempting to obtain the target class profile	 *//*  in TargetClassFreq.  This is done by placing the targeted number     *//*  of items of each class at the beginning of the set of data items.	 *//*									 *//*************************************************************************/    FormInitialWindow()/*  -------------------  */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -