⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 consult.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
字号:
/*************************************************************************/
/*								   	 */
/*	Classify items interactively using a decision tree	   	 */
/*	--------------------------------------------------   		 */
/*								   	 */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

extern FILE *fLog;
extern short MaxAtt, MaxClass, MaxDiscrVal;
extern ItemNo MaxItem;

extern Description	*Item;

extern DiscrValue	*MaxAttVal;

extern String *ClassName,
		*AttName,
		**AttValName;
extern char FileName[100];

extern char * SpecialStatus;

extern short VERBOSITY;
short MYTRACE;

extern RangeDescRec RangeDesc;

extern Tree	DecisionTree;			/* tree being used */

extern float *ClassSum;		/* accumulated central estimates */

float	*LowClassSum;	/* accumulated lower estimates */

/*************************************************************************/
/*								   	 */
/*  Classify the extended case description in RangeDesc using the	 */
/*  given subtree, by adjusting the values ClassSum and LowClassSum	 */
/*  for each class, indicating the likelihood of the case being  	 */
/*  of that class.						   	 */
/*								   	 */
/*************************************************************************/
void ClassifyCase(Tree Subtree, float Weight)
{
    DiscrValue v;
    float BranchWeight;
    Attribute a;
    short s;
    ClassNo c;

    /*  A leaf  */

    if ( ! Subtree->NodeType )
    {
		Verbosity(1)
			fprintf(fLog,"\tClass %s weight %g cases %g\n", 
				ClassName[Subtree->Leaf], Weight, Subtree->Items);

		if ( Subtree->Items > 0 )
		{
			/*  Adjust class sum of ALL classes, but adjust low class sum
			of leaf class only  */

			ForEach(c, 0, MaxClass)
			{
				ClassSum[c] += Weight * Subtree->ClassDist[c] / Subtree->Items;
			}

			LowClassSum[Subtree->Leaf] +=
				Weight * (1 - Subtree->Errors / Subtree->Items);
		}
		else
		{
			ClassSum[Subtree->Leaf] += Weight;
		}

		return;
    }

    a = Subtree->Tested;

    CheckValue(a, Subtree);

    /*  Unknown value  */

    if ( ! RangeDesc[a].Known )
    {
		ForEach(v, 1, Subtree->Forks)
		{
			ClassifyCase(Subtree->Branch[v],
				 (Weight * Subtree->Branch[v]->Items) / Subtree->Items);
		}
		return;
    }

    /*  Known value  */

    switch ( Subtree->NodeType )
    {
	case BrDiscr:  /* test of discrete attribute */
		ForEach(v, 1, MaxAttVal[a])
		{
			BranchWeight = RangeDesc[a].Probability[v];
			if ( BranchWeight > 0 )
			{
				Verbosity(1)
		    		printf("\tWeight %g: test att %s (val %s = %g)\n",
		    			   Weight, AttName[a], AttValName[a][v],
					   BranchWeight);

				ClassifyCase(Subtree->Branch[v], Weight * BranchWeight);
			}
		}
		break;
	case ThreshContin:  /* test of continuous attribute */
		BranchWeight = 	RangeDesc[a].UpperBound <= Subtree->Lower ? 1.0 : 
		RangeDesc[a].LowerBound > Subtree->Upper ? 0.0 :
		RangeDesc[a].LowerBound != RangeDesc[a].UpperBound ?
			(Area(Subtree, RangeDesc[a].LowerBound) -
			Area(Subtree, RangeDesc[a].UpperBound)) /
			(RangeDesc[a].UpperBound - RangeDesc[a].LowerBound) :
		Interpolate(Subtree, RangeDesc[a].LowerBound) ;
		
		Verbosity(1)
			fprintf(fLog,"\tWeight %g: test att %s (branch weight=%g)\n",
			Weight, AttName[a], BranchWeight);
		if ( BranchWeight > Fuzz )
		{
			ClassifyCase(Subtree->Branch[1], Weight * BranchWeight);
		}
		if ( BranchWeight < 1-Fuzz )
		{
			ClassifyCase(Subtree->Branch[2], Weight * (1 - BranchWeight));
		}
		break;
	case BrSubset:  /* subset test on discrete attribute  */
		ForEach(s, 1, Subtree->Forks)
		{
			BranchWeight = 0.0;
			ForEach(v, 1, MaxAttVal[a])
			{
				if ( In(v, Subtree->Subset[s]) )
				{
					BranchWeight += RangeDesc[a].Probability[v];
				}
			}
			if ( BranchWeight > 0 )
			{
				Verbosity(1)
		    		printf("\tWeight %g: test att %s (val %s = %g)\n",
		    			   Weight, AttName[a], AttValName[a][v],
					   BranchWeight);

				ClassifyCase(Subtree->Branch[s], Weight * BranchWeight);
			}
		}
		break;
    }
}

/*************************************************************************/
/*								   	 */
/*  Interpolate a single value between Lower, Cut and Upper		 */
/*								   	 */
/*************************************************************************/
float Interpolate(Tree t,float v)
{
    float Sum=Epsilon;

    if ( v <= t->Lower )
    {
		return 1.0;
    }

    if ( v <= t->Cut )
    {
		return 1 - 0.5 * (v - t->Lower) / (t->Cut - t->Lower + Epsilon);
    }

    if ( v < t->Upper )
    {
		return 0.5 - 0.5 * (v - t->Cut) / (t->Upper - t->Cut + Epsilon);
    }

    return 0.0;
}



/*************************************************************************/
/*								   	 */
/*  Compute the area under a soft threshold curve to the right of a	 */
/*  given value.							 */
/*								   	 */
/*************************************************************************/


float Area(Tree t, float v)
{
    float Sum=Epsilon, F;

    if ( v < t->Lower )
    {
		Sum += t->Lower - v;
		v = t->Lower;
    }

    if ( v < t->Cut )
    {
		F = (t->Cut - v ) / (t->Cut - t->Lower + Epsilon);

		Sum += 0.5 * (t->Cut - v) + 0.25 * F * (t->Cut - v);
		v = t->Cut;
    }

    if ( v < t->Upper )
    {
		F = (t->Upper - v ) / (t->Upper - t->Cut + Epsilon);

		Sum += 0.25 * (t->Upper - v) * F;
    }

    Verbosity(1) fprintf(fLog,"lower=%g  cut=%g  upper=%g  area=%g\n",
    			t->Lower, t->Cut, t->Upper, Sum);

    return Sum;
}

/*************************************************************************/
/*								  	 */
/*		Process a single case				  	 */
/*								  	 */
/*************************************************************************/
void InterpretTree()
{ 
    ClassNo c, BestClass;
    float Uncertainty=1.0;
    char Reply;
    Attribute a;

    /*  Initialise  */

    ForEach(a, 0, MaxAtt)
    {
		RangeDesc[a].Asked = false;
    }

    if ( ! ClassSum )
    {
		/*  The first time through .. allocate class sums  */

		ClassSum = (float *) malloc((MaxClass+1) * sizeof(float));
		LowClassSum = (float *) malloc((MaxClass+1) * sizeof(float));

		fprintf(fLog,"\n");
    }
    else
    {
		fprintf(fLog,"\n-------------------------------------------\n\n");
    }

    ForEach(c, 0, MaxClass)
    {
		LowClassSum[c] = ClassSum[c] = 0;
    }

    /*  Find the likelihood of an item's being of each class  */

    ClassifyCase(DecisionTree, 1.0);

    /*  Find the best class and show decision made  */

    BestClass = 0;
    ForEach(c, 0, MaxClass)
    {
		Verbosity(1) printf("class %d weight %.2f\n", c, ClassSum[c]);

		Uncertainty -= LowClassSum[c];
		if ( ClassSum[c] > ClassSum[BestClass] ) BestClass = c;
    }

    printf("\nDecision:\n");
    Decision(BestClass, ClassSum[BestClass],LowClassSum[BestClass],
		Uncertainty + LowClassSum[BestClass]);

    /*  Show the other significant classes, if more than two classes  */

    if ( MaxClass > 1 )
    {
		while ( true )
		{
			ClassSum[BestClass] = 0;
			BestClass = 0;
			ForEach(c, 0, MaxClass)
			{
				if ( ClassSum[c] > ClassSum[BestClass] ) BestClass = c;
			}

			if ( ClassSum[BestClass] < Fuzz ) break;

			Decision(BestClass, ClassSum[BestClass],LowClassSum[BestClass],
				Uncertainty + LowClassSum[BestClass]);
		}
    }

    /*  Prompt for what to do next  */

    while ( true )
    {
	//	modify here;

		printf("\nRetry, new case or quit [r,n,q]: ");
		Reply = getchar();
		SkipLine(Reply);
		switch ( Reply )
		{
		  case 'r':  return;
		  case 'n':  Clear(); return;
		  case 'q':  exit(0);
		  default:   printf("Please enter 'r', 'n' or 'q'");
		}
    }
}



/*************************************************************************/
/*								  	 */
/*  Print the chosen class with certainty factor and range	  	 */
/*								  	 */
/*************************************************************************/
void Decision(ClassNo c, float p, float lb, float ub)
{
    fprintf(fLog,"\t%s", ClassName[c]);

    if ( p < 1-Fuzz || lb < ub - Fuzz )
    {
		fprintf(fLog,"  CF = %.2f", p);
		if ( lb < ub - Fuzz )
		{
			fprintf(fLog,"  [ %.2f - %.2f ]", lb, ub);
		}
    }

    fprintf(fLog,"\n");
}


/*************************************************************************/
/*								 	 */
/*	User interface for consulting trees and rulesets	 	 */
/*	------------------------------------------------	 	 */
/*								 	 */
/*************************************************************************/
/*************************************************************************/
/*									 */
/*	Ask for the value of attribute Att if necessary			 */
/*									 */
/*************************************************************************/
void CheckValue(Attribute Att, Tree T)
{
    if ( RangeDesc[Att].Asked ) return;

    printf("%s", AttName[Att]);
    if ( RangeDesc[Att].Known )
    {
		fprintf(fLog," [ ");
		PrintRange(Att);
		fprintf(fLog," ]");
    }
    fprintf(fLog,": ");

    ReadRange(Att, T);
}
/*************************************************************************/
/*									 */
/*	Print the range of values for attribute Att			 */
/*									 */
/*************************************************************************/
void PrintRange(Attribute Att)    
{
    DiscrValue dv;
    Boolean First=true;
    float p;

    if ( MaxAttVal[Att] )  /*  discrete attribute  */
    {
		ForEach(dv, 1, MaxAttVal[Att] )
		{
			if ( (p = RangeDesc[Att].Probability[dv]) > Fuzz )
			{
				if ( ! First )
				{
					fprintf(fLog,", ");
				}
				First = false;

				fprintf(fLog,"%s", AttValName[Att][dv]);
				if ( p < 1-Fuzz )
				{
					fprintf(fLog,": %.2f", p);
				}
			}
		}
    }
    else  /*  continuous attribute  */
    {
		fprintf(fLog,"%g", RangeDesc[Att].LowerBound);
		if ( RangeDesc[Att].UpperBound > RangeDesc[Att].LowerBound + Fuzz )
		{
			fprintf(fLog," - %g", RangeDesc[Att].UpperBound);
		}
    }
}


extern	char		Delimiter;
//#define	SkipSpace	while ( (c = getchar()) == ' ' || c == '\t' )
/*************************************************************************/
/*									 */
/*	Read a range of values for attribute Att or <cr>		 */
/*									 */
/*************************************************************************/
void ReadRange(Attribute Att, Tree T)
{
    char c;

    RangeDesc[Att].Asked=true;

    SkipSpace;

    if ( c == '\n' )
    {
		return;
    }
    if ( c == '?' )
    {
		if ( (c = getchar()) == 't' )
		{
			if ( T ) PrintTree(T);
			SkipLine(c);
			RangeDesc[Att].Asked = false;
			CheckValue(Att, T);
		}
		else
		{
			RangeDesc[Att].Known = false;
			SkipLine(c);
		}
		return;
    }

    ungetc(c, stdin);
    RangeDesc[Att].Known = true;

    if ( MaxAttVal[Att] )
    {
		ReadDiscr(Att, T);
    }
    else
    {
		ReadContin(Att, T);
    }
}



/*************************************************************************/
/*									 */
/*	Read a discrete attribute value or range			 */
/*									 */
/*************************************************************************/
void ReadDiscr(Attribute Att, Tree T)
{
    char Name[500];
    DiscrValue dv, PNo;
    float P, PSum;

    ForEach(dv, 1, MaxAttVal[Att])
    {
		RangeDesc[Att].Probability[dv] = 0.0;
    }

    do
	{
		ReadName(stdin, Name);

//		dv = Which(Name, AttValName[Att], 1, MaxAttVal[Att]);
		if ( ! dv )
		{
			printf("\tPermissible values are %s", AttValName[Att][1]);
			ForEach(dv, 2, MaxAttVal[Att])
			{
				printf(", %s", AttValName[Att][dv]);
			}
			printf("\n");

			SkipLine(Delimiter);
			Retry(Att, T);
			return;
		}

		if ( Delimiter == ':' )
		{
			ReadName(stdin, Name);
			sscanf(Name, "%f", &P);	/* get probability */
		}
		else
		{
			P = 1.0;		/*  only one attribute value  */
		}

		RangeDesc[Att].Probability[dv] = P;
	}while ( Delimiter == ',' );

    /*  Check that sum of probabilities is not > 1  */

    PNo = MaxAttVal[Att];
    PSum = 1.0;
    ForEach(dv, 1, MaxAttVal[Att])
    {
		if ( RangeDesc[Att].Probability[dv] > Fuzz )
		{
			PSum -= RangeDesc[Att].Probability[dv];
			PNo--;
		}
    }

    if ( PSum < 0 || ! PNo && PSum > Fuzz )
    {
		printf("Probability values must sum to 1\n");
		SkipLine(Delimiter);
		Retry(Att, T);
		return;
    }

    /*  Distribute the remaining probability equally among
	the unspecified attribute values  */

    PSum /= PNo;
    ForEach(dv, 1, MaxAttVal[Att])
    {
		if ( RangeDesc[Att].Probability[dv] < Fuzz )
		{
			RangeDesc[Att].Probability[dv] = PSum;
		}
    }
}



/*************************************************************************/
/*									 */
/*	Read a continuous attribute value or range			 */
/*									 */
/*************************************************************************/
void ReadContin(Attribute Att, Tree T)
{
    char c;

    scanf("%f", &RangeDesc[Att].LowerBound);
    SkipSpace;

    if ( c == '-'  )
    {
		scanf("%f", &RangeDesc[Att].UpperBound);
		SkipSpace;
    }
    else
    {
		RangeDesc[Att].UpperBound = RangeDesc[Att].LowerBound;
    }

    if ( c != '\n' )
    {
		printf("Must be a continuous value or range\n");
		SkipLine(c);
		Retry(Att, T);
    }
}



/*************************************************************************/
/*									 */
/*	Try again to obtain a value for attribute Att			 */
/*									 */
/*************************************************************************/
void Retry(Attribute Att, Tree T)
{
    RangeDesc[Att].Asked = false;
    RangeDesc[Att].Known = false;
    CheckValue(Att, T);
}



/*************************************************************************/
/*									 */
/*	Skip to the end of the line of input				 */
/*									 */
/*************************************************************************/
void SkipLine(char c)
{
    while ( c != '\n' ) c = getchar();
}



/*************************************************************************/
/*									 */
/*		Clear the range description				 */
/*									 */
/*************************************************************************/
void Clear()
{
    Attribute Att;

    ForEach(Att, 0, MaxAtt)
    {
		RangeDesc[Att].Known = false;
    }
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -