⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 consult.c

📁 这是一个决策树实现的算法
💻 C
字号:
/*************************************************************************/
/*								   	 */
/*	Classify items interactively using a decision tree	   	 */
/*	--------------------------------------------------   		 */
/*								   	 */
/*************************************************************************/


#include "defns.i"
#include "types.i"


		/*  External data  -- see c4.5.c for meanings  */

short		MaxAtt, MaxClass, MaxDiscrVal;

ItemNo		MaxItem;

Description	*Item;

DiscrValue	*MaxAttVal;

String		*ClassName,
		*AttName,
		**AttValName,
		FileName = "DF";

char		*SpecialStatus;

short		VERBOSITY = 0,
		TRACE     = 0;


	/*  The interview module uses a more complex description of an
	    case called a "Range Description".   The value of an
	    attribute is given by
	    - lower and upper bounds (continuous attribute)
	    - probability of each possible value (discrete attribute)  */


typedef	struct ValRange *RangeDescRec;

struct ValRange
	{
	    Boolean	Known,		/* is range known? */
			Asked;		/* has it been asked? */
	    float	LowerBound,	/* lower bound given */
			UpperBound,	/* upper ditto */
			*Probability;	/* prior prob of each discr value */
	};

RangeDescRec RangeDesc;

Tree	DecisionTree,			/* tree being used */
	GetTree();

float	*LowClassSum,			/* accumulated lower estimates */
	*ClassSum = Nil;		/* accumulated central estimates */

#define Fuzz	0.01			/* minimum weight */



/*************************************************************************/
/*								   	 */
/*  Classify the extended case description in RangeDesc using the	 */
/*  given subtree, by adjusting the values ClassSum and LowClassSum	 */
/*  for each class, indicating the likelihood of the case being  	 */
/*  of that class.						   	 */
/*								   	 */
/*************************************************************************/


    ClassifyCase(Subtree, Weight)
/*  ------------ 	 */
    Tree Subtree;
    float Weight;
{
    DiscrValue v;
    float BranchWeight, Area(), Interpolate();
    Attribute a;
    short s;
    ClassNo c;

    /*  A leaf  */

    if ( ! Subtree->NodeType )
    {
	Verbosity(1)
	    printf("\tClass %s weight %g cases %g\n", 
		    ClassName[Subtree->Leaf], Weight, Subtree->Items);

	if ( Subtree->Items > 0 )
	{
	    /*  Adjust class sum of ALL classes, but adjust low class sum
		of leaf class only  */

	    ForEach(c, 0, MaxClass)
	    {
		ClassSum[c] += Weight * Subtree->ClassDist[c] / Subtree->Items;
	    }

	    LowClassSum[Subtree->Leaf] +=
		Weight * (1 - Subtree->Errors / Subtree->Items);
	}
	else
	{
	    ClassSum[Subtree->Leaf] += Weight;
	}

	return;
    }

    a = Subtree->Tested;

    CheckValue(a, Subtree);

    /*  Unknown value  */

    if ( ! RangeDesc[a].Known )
    {
	ForEach(v, 1, Subtree->Forks)
	{
	    ClassifyCase(Subtree->Branch[v],
		     (Weight * Subtree->Branch[v]->Items) / Subtree->Items);
	}
	return;
    }

    /*  Known value  */

    switch ( Subtree->NodeType )
    {
	case BrDiscr:  /* test of discrete attribute */

	    ForEach(v, 1, MaxAttVal[a])
	    {
		BranchWeight = RangeDesc[a].Probability[v];
		if ( BranchWeight > 0 )
		{
		    Verbosity(1)
		    	printf("\tWeight %g: test att %s (val %s = %g)\n",
		    	       Weight, AttName[a], AttValName[a][v],
			       BranchWeight);

		    ClassifyCase(Subtree->Branch[v], Weight * BranchWeight);
		}
	    }
	    break;

	case ThreshContin:  /* test of continuous attribute */

	    BranchWeight = 
		RangeDesc[a].UpperBound <= Subtree->Lower ? 1.0 :
		RangeDesc[a].LowerBound > Subtree->Upper ? 0.0 :
		RangeDesc[a].LowerBound != RangeDesc[a].UpperBound ?
		    (Area(Subtree, RangeDesc[a].LowerBound) -
		     Area(Subtree, RangeDesc[a].UpperBound)) /
		    (RangeDesc[a].UpperBound - RangeDesc[a].LowerBound) :
		Interpolate(Subtree, RangeDesc[a].LowerBound) ;

	    Verbosity(1)
	        printf("\tWeight %g: test att %s (branch weight=%g)\n",
	    	       Weight, AttName[a], BranchWeight);

	    if ( BranchWeight > Fuzz )
	    {
		ClassifyCase(Subtree->Branch[1], Weight * BranchWeight);
	    }
	    if ( BranchWeight < 1-Fuzz )
	    {
		ClassifyCase(Subtree->Branch[2], Weight * (1 - BranchWeight));
	    }
	    break;

	case BrSubset:  /* subset test on discrete attribute  */

	    ForEach(s, 1, Subtree->Forks)
	    {
		BranchWeight = 0.0;
		ForEach(v, 1, MaxAttVal[a])
		{
		    if ( In(v, Subtree->Subset[s]) )
		    {
			BranchWeight += RangeDesc[a].Probability[v];
		    }
		}
		if ( BranchWeight > 0 )
		{
		    Verbosity(1)
		    	printf("\tWeight %g: test att %s (val %s = %g)\n",
		    	       Weight, AttName[a], AttValName[a][v],
			       BranchWeight);

		    ClassifyCase(Subtree->Branch[s], Weight * BranchWeight);
		}
	    }
	    break;
    }
}



/*************************************************************************/
/*								   	 */
/*  Interpolate a single value between Lower, Cut and Upper		 */
/*								   	 */
/*************************************************************************/


float Interpolate(t, v)
/*    ---- 	 */
    Tree t;
    float v;
{
    float Sum=Epsilon;

    if ( v <= t->Lower )
    {
	return 1.0;
    }

    if ( v <= t->Cut )
    {
	return 1 - 0.5 * (v - t->Lower) / (t->Cut - t->Lower + Epsilon);
    }

    if ( v < t->Upper )
    {
	return 0.5 - 0.5 * (v - t->Cut) / (t->Upper - t->Cut + Epsilon);
    }

    return 0.0;
}



/*************************************************************************/
/*								   	 */
/*  Compute the area under a soft threshold curve to the right of a	 */
/*  given value.							 */
/*								   	 */
/*************************************************************************/


float Area(t, v)
/*    ---- 	 */
    Tree t;
    float v;
{
    float Sum=Epsilon, F;

    if ( v < t->Lower )
    {
	Sum += t->Lower - v;
	v = t->Lower;
    }

    if ( v < t->Cut )
    {
	F = (t->Cut - v ) / (t->Cut - t->Lower + Epsilon);

	Sum += 0.5 * (t->Cut - v) + 0.25 * F * (t->Cut - v);
	v = t->Cut;
    }

    if ( v < t->Upper )
    {
	F = (t->Upper - v ) / (t->Upper - t->Cut + Epsilon);

	Sum += 0.25 * (t->Upper - v) * F;
    }

    Verbosity(1) printf("lower=%g  cut=%g  upper=%g  area=%g\n",
    			t->Lower, t->Cut, t->Upper, Sum);

    return Sum;
}



/*************************************************************************/
/*								  	 */
/*		Process a single case				  	 */
/*								  	 */
/*************************************************************************/


    InterpretTree()
/*  ------------- 	 */
{ 
    ClassNo c, BestClass;
    float Uncertainty=1.0;
    char Reply;
    Attribute a;

    /*  Initialise  */

    ForEach(a, 0, MaxAtt)
    {
	RangeDesc[a].Asked = false;
    }

    if ( ! ClassSum )
    {
	/*  The first time through .. allocate class sums  */

	ClassSum = (float *) malloc((MaxClass+1) * sizeof(float));
	LowClassSum = (float *) malloc((MaxClass+1) * sizeof(float));

	printf("\n");
    }
    else
    {
	printf("\n-------------------------------------------\n\n");
    }

    ForEach(c, 0, MaxClass)
    {
	LowClassSum[c] = ClassSum[c] = 0;
    }

    /*  Find the likelihood of an item's being of each class  */

    ClassifyCase(DecisionTree, 1.0);

    /*  Find the best class and show decision made  */

    BestClass = 0;
    ForEach(c, 0, MaxClass)
    {
	Verbosity(1) printf("class %d weight %.2f\n", c, ClassSum[c]);

	Uncertainty -= LowClassSum[c];
	if ( ClassSum[c] > ClassSum[BestClass] ) BestClass = c;
    }

    printf("\nDecision:\n");
    Decision(BestClass, ClassSum[BestClass],
	     LowClassSum[BestClass],
	     Uncertainty + LowClassSum[BestClass]);

    /*  Show the other significant classes, if more than two classes  */

    if ( MaxClass > 1 )
    {
	while ( true )
	{
	    ClassSum[BestClass] = 0;
	    BestClass = 0;
	    ForEach(c, 0, MaxClass)
	    {
		if ( ClassSum[c] > ClassSum[BestClass] ) BestClass = c;
	    }

	    if ( ClassSum[BestClass] < Fuzz ) break;

	    Decision(BestClass, ClassSum[BestClass],
		     LowClassSum[BestClass],
		     Uncertainty + LowClassSum[BestClass]);
	}
    }

    /*  Prompt for what to do next  */

    while ( true )
    {
	printf("\nRetry, new case or quit [r,n,q]: ");
	Reply = getchar();
	SkipLine(Reply);
	switch ( Reply )
	{
	  case 'r':  return;
	  case 'n':  Clear(); return;
	  case 'q':  exit(0);
	  default:   printf("Please enter 'r', 'n' or 'q'");
	}
    }
}



/*************************************************************************/
/*								  	 */
/*  Print the chosen class with certainty factor and range	  	 */
/*								  	 */
/*************************************************************************/


    Decision(c, p, lb, ub)
/*  -------- 	 */
    ClassNo c;
    float p, lb, ub;
{
    printf("\t%s", ClassName[c]);

    if ( p < 1-Fuzz || lb < ub - Fuzz )
    {
	printf("  CF = %.2f", p);
	if ( lb < ub - Fuzz )
	{
	    printf("  [ %.2f - %.2f ]", lb, ub);
	}
    }

    printf("\n");
}



/*************************************************************************/
/*								  	 */
/*  Main routine for classifying items using a decision tree	  	 */
/*								  	 */
/*************************************************************************/


    main(Argc, Argv)
/*  ---- 	 */
    int Argc;
    char *Argv[];
{
    int o;
    extern char *optarg;
    extern int optind;
    Attribute a;

    PrintHeader("decision tree interpreter");

    /*  Process options  */

    while ( (o = getopt(Argc, Argv, "tvf:")) != EOF )
    {
	switch (o)
	{
	    case 't':	TRACE = 1;
			break;
	    case 'v':	VERBOSITY = 1;
			break;
	    case 'f':	FileName = optarg;
			break;
	    case '?':	printf("unrecognised option\n");
			exit(1);
	}
    }

    /*  Initialise  */

    GetNames();

    DecisionTree = GetTree(".tree");
    if ( TRACE ) PrintTree(DecisionTree);

    /*  Allocate value ranges  */

    RangeDesc = (struct ValRange *) calloc(MaxAtt+1, sizeof(struct ValRange));

    ForEach(a, 0, MaxAtt)
    {
	if ( MaxAttVal[a] )
	{
	    RangeDesc[a].Probability =
		(float *) calloc(MaxAttVal[a]+1, sizeof(float));
	}
    }

    /*  Consult  */

    Clear();
    while ( true )
    {
	InterpretTree();
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -