⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 consult.c

📁 c4.5的源码决策树最全面最经典的版本
💻 C
字号:
/*************************************************************************//*								   	 *//*	Classify items interactively using a decision tree	   	 *//*	--------------------------------------------------   		 *//*								   	 *//*************************************************************************/#include "defns.i"#include "types.i"		/*  External data  -- see c4.5.c for meanings  */short		MaxAtt, MaxClass, MaxDiscrVal;ItemNo		MaxItem;Description	*Item;DiscrValue	*MaxAttVal;String		*ClassName,		*AttName,		**AttValName,		FileName = "DF";char		*SpecialStatus;short		VERBOSITY = 0,		TRACE     = 0;	/*  The interview module uses a more complex description of an	    case called a "Range Description".   The value of an	    attribute is given by	    - lower and upper bounds (continuous attribute)	    - probability of each possible value (discrete attribute)  */typedef	struct ValRange *RangeDescRec;struct ValRange	{	    Boolean	Known,		/* is range known? */			Asked;		/* has it been asked? */	    float	LowerBound,	/* lower bound given */			UpperBound,	/* upper ditto */			*Probability;	/* prior prob of each discr value */	};RangeDescRec RangeDesc;Tree	DecisionTree,			/* tree being used */	GetTree();float	*LowClassSum,			/* accumulated lower estimates */	*ClassSum = Nil;		/* accumulated central estimates */#define Fuzz	0.01			/* minimum weight *//*************************************************************************//*								   	 *//*  Classify the extended case description in RangeDesc using the	 *//*  given subtree, by adjusting the values ClassSum and LowClassSum	 *//*  for each class, indicating the likelihood of the case being  	 *//*  of that class.						   	 *//*								   	 *//*************************************************************************/    ClassifyCase(Subtree, Weight)/*  ------------ 	 */    Tree Subtree;    float Weight;{    DiscrValue v;    float BranchWeight, Area(), Interpolate();    Attribute a;    short s;    ClassNo c;    /*  A leaf  */    if ( ! Subtree->NodeType )    {	Verbosity(1)	    printf("\tClass %s weight %g cases %g\n", 		    ClassName[Subtree->Leaf], Weight, Subtree->Items);	if ( Subtree->Items > 0 )	{	    /*  Adjust class sum of ALL classes, but adjust low class sum		of leaf class only  */	    ForEach(c, 0, MaxClass)	    {		ClassSum[c] += Weight * Subtree->ClassDist[c] / Subtree->Items;	    }	    LowClassSum[Subtree->Leaf] +=		Weight * (1 - Subtree->Errors / Subtree->Items);	}	else	{	    ClassSum[Subtree->Leaf] += Weight;	}	return;    }    a = Subtree->Tested;    CheckValue(a, Subtree);    /*  Unknown value  */    if ( ! RangeDesc[a].Known )    {	ForEach(v, 1, Subtree->Forks)	{	    ClassifyCase(Subtree->Branch[v],		     (Weight * Subtree->Branch[v]->Items) / Subtree->Items);	}	return;    }    /*  Known value  */    switch ( Subtree->NodeType )    {	case BrDiscr:  /* test of discrete attribute */	    ForEach(v, 1, MaxAttVal[a])	    {		BranchWeight = RangeDesc[a].Probability[v];		if ( BranchWeight > 0 )		{		    Verbosity(1)		    	printf("\tWeight %g: test att %s (val %s = %g)\n",		    	       Weight, AttName[a], AttValName[a][v],			       BranchWeight);		    ClassifyCase(Subtree->Branch[v], Weight * BranchWeight);		}	    }	    break;	case ThreshContin:  /* test of continuous attribute */	    BranchWeight = 		RangeDesc[a].UpperBound <= Subtree->Lower ? 1.0 :		RangeDesc[a].LowerBound > Subtree->Upper ? 0.0 :		RangeDesc[a].LowerBound != RangeDesc[a].UpperBound ?		    (Area(Subtree, RangeDesc[a].LowerBound) -		     Area(Subtree, RangeDesc[a].UpperBound)) /		    (RangeDesc[a].UpperBound - RangeDesc[a].LowerBound) :		Interpolate(Subtree, RangeDesc[a].LowerBound) ;	    Verbosity(1)	        printf("\tWeight %g: test att %s (branch weight=%g)\n",	    	       Weight, AttName[a], BranchWeight);	    if ( BranchWeight > Fuzz )	    {		ClassifyCase(Subtree->Branch[1], Weight * BranchWeight);	    }	    if ( BranchWeight < 1-Fuzz )	    {		ClassifyCase(Subtree->Branch[2], Weight * (1 - BranchWeight));	    }	    break;	case BrSubset:  /* subset test on discrete attribute  */	    ForEach(s, 1, Subtree->Forks)	    {		BranchWeight = 0.0;		ForEach(v, 1, MaxAttVal[a])		{		    if ( In(v, Subtree->Subset[s]) )		    {			BranchWeight += RangeDesc[a].Probability[v];		    }		}		if ( BranchWeight > 0 )		{		    Verbosity(1)		    	printf("\tWeight %g: test att %s (val %s = %g)\n",		    	       Weight, AttName[a], AttValName[a][v],			       BranchWeight);		    ClassifyCase(Subtree->Branch[s], Weight * BranchWeight);		}	    }	    break;    }}/*************************************************************************//*								   	 *//*  Interpolate a single value between Lower, Cut and Upper		 *//*								   	 *//*************************************************************************/float Interpolate(t, v)/*    ---- 	 */    Tree t;    float v;{    float Sum=Epsilon;    if ( v <= t->Lower )    {	return 1.0;    }    if ( v <= t->Cut )    {	return 1 - 0.5 * (v - t->Lower) / (t->Cut - t->Lower + Epsilon);    }    if ( v < t->Upper )    {	return 0.5 - 0.5 * (v - t->Cut) / (t->Upper - t->Cut + Epsilon);    }    return 0.0;}/*************************************************************************//*								   	 *//*  Compute the area under a soft threshold curve to the right of a	 *//*  given value.							 *//*								   	 *//*************************************************************************/float Area(t, v)/*    ---- 	 */    Tree t;    float v;{    float Sum=Epsilon, F;    if ( v < t->Lower )    {	Sum += t->Lower - v;	v = t->Lower;    }    if ( v < t->Cut )    {	F = (t->Cut - v ) / (t->Cut - t->Lower + Epsilon);	Sum += 0.5 * (t->Cut - v) + 0.25 * F * (t->Cut - v);	v = t->Cut;    }    if ( v < t->Upper )    {	F = (t->Upper - v ) / (t->Upper - t->Cut + Epsilon);	Sum += 0.25 * (t->Upper - v) * F;    }    Verbosity(1) printf("lower=%g  cut=%g  upper=%g  area=%g\n",    			t->Lower, t->Cut, t->Upper, Sum);    return Sum;}/*************************************************************************//*								  	 *//*		Process a single case				  	 *//*								  	 *//*************************************************************************/    InterpretTree()/*  ------------- 	 */{     ClassNo c, BestClass;    float Uncertainty=1.0;    char Reply;    Attribute a;    /*  Initialise  */    ForEach(a, 0, MaxAtt)    {	RangeDesc[a].Asked = false;    }    if ( ! ClassSum )    {	/*  The first time through .. allocate class sums  */	ClassSum = (float *) malloc((MaxClass+1) * sizeof(float));	LowClassSum = (float *) malloc((MaxClass+1) * sizeof(float));	printf("\n");    }    else    {	printf("\n-------------------------------------------\n\n");    }    ForEach(c, 0, MaxClass)    {	LowClassSum[c] = ClassSum[c] = 0;    }    /*  Find the likelihood of an item's being of each class  */    ClassifyCase(DecisionTree, 1.0);    /*  Find the best class and show decision made  */    BestClass = 0;    ForEach(c, 0, MaxClass)    {	Verbosity(1) printf("class %d weight %.2f\n", c, ClassSum[c]);	Uncertainty -= LowClassSum[c];	if ( ClassSum[c] > ClassSum[BestClass] ) BestClass = c;    }    printf("\nDecision:\n");    Decision(BestClass, ClassSum[BestClass],	     LowClassSum[BestClass],	     Uncertainty + LowClassSum[BestClass]);    /*  Show the other significant classes, if more than two classes  */    if ( MaxClass > 1 )    {	while ( true )	{	    ClassSum[BestClass] = 0;	    BestClass = 0;	    ForEach(c, 0, MaxClass)	    {		if ( ClassSum[c] > ClassSum[BestClass] ) BestClass = c;	    }	    if ( ClassSum[BestClass] < Fuzz ) break;	    Decision(BestClass, ClassSum[BestClass],		     LowClassSum[BestClass],		     Uncertainty + LowClassSum[BestClass]);	}    }    /*  Prompt for what to do next  */    while ( true )    {	printf("\nRetry, new case or quit [r,n,q]: ");	Reply = getchar();	SkipLine(Reply);	switch ( Reply )	{	  case 'r':  return;	  case 'n':  Clear(); return;	  case 'q':  exit(0);	  default:   printf("Please enter 'r', 'n' or 'q'");	}    }}/*************************************************************************//*								  	 *//*  Print the chosen class with certainty factor and range	  	 *//*								  	 *//*************************************************************************/    Decision(c, p, lb, ub)/*  -------- 	 */    ClassNo c;    float p, lb, ub;{    printf("\t%s", ClassName[c]);    if ( p < 1-Fuzz || lb < ub - Fuzz )    {	printf("  CF = %.2f", p);	if ( lb < ub - Fuzz )	{	    printf("  [ %.2f - %.2f ]", lb, ub);	}    }    printf("\n");}/*************************************************************************//*								  	 *//*  Main routine for classifying items using a decision tree	  	 *//*								  	 *//*************************************************************************/    main(Argc, Argv)/*  ---- 	 */    int Argc;    char *Argv[];{    int o;    extern char *optarg;    extern int optind;    Attribute a;    PrintHeader("decision tree interpreter");    /*  Process options  */    while ( (o = getopt(Argc, Argv, "tvf:")) != EOF )    {	switch (o)	{	    case 't':	TRACE = 1;			break;	    case 'v':	VERBOSITY = 1;			break;	    case 'f':	FileName = optarg;			break;	    case '?':	printf("unrecognised option\n");			exit(1);	}    }    /*  Initialise  */    GetNames();    DecisionTree = GetTree(".tree");    if ( TRACE ) PrintTree(DecisionTree);    /*  Allocate value ranges  */    RangeDesc = (struct ValRange *) calloc(MaxAtt+1, sizeof(struct ValRange));    ForEach(a, 0, MaxAtt)    {	if ( MaxAttVal[a] )	{	    RangeDesc[a].Probability =		(float *) calloc(MaxAttVal[a]+1, sizeof(float));	}    }    /*  Consult  */    Clear();    while ( true )    {	InterpretTree();    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -