⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trees.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
字号:
/*************************************************************************/
/*									 */
/*	Routines for displaying, building, saving and restoring trees	 */
/*	-------------------------------------------------------------	 */
/*									 */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

extern FILE *fLog;
/*  If lines look like getting too long while a tree is being
	    printed, subtrees are broken off and printed separately after
	    the main tree is finished	 */

short	Subtree;		/* highest subtree to be printed */
Tree	Subdef[100];		/* pointers to subtrees */

FILE	*TRf = NULL;	/* file pointer for tree i/o */
char	Fn[500];		/* file name */



/*************************************************************************/
/*									 */
/*	Display entire decision tree T					 */
/*									 */
/*************************************************************************/
void PrintTree(Tree T)
{
    short s;

    Subtree=0;
    fprintf(fLog,"Decision Tree:\n");
    Show(T, 0);
    fprintf(fLog,"\n");

    ForEach(s, 1, Subtree)
    {
		fprintf(fLog,"\n\nSubtree [S%d]\n", s);
		Show(Subdef[s], 0);
		fprintf(fLog,"\n");
    }
    fprintf(fLog,"\n");
}


/*************************************************************************/
/*									 */
/*	Display the tree T with offset Sh				 */
/*									 */
/*************************************************************************/
void Show(Tree T,short Sh)
{
    DiscrValue v, MaxV;
    if ( T->NodeType )
    {
		/*  See whether separate subtree needed  */

		if ( T != Nil && Sh && Sh * TabSize + MaxLine(T) > Width )
		{
			if ( Subtree < 99 )
			{
				Subdef[++Subtree] = T;
				fprintf(fLog,"[S%d]", Subtree);
			}
			else
			{
				fprintf(fLog,"[S??]");
			}
		}
		else
		{
			MaxV = T->Forks;

			/*  Print simple cases first */

			ForEach(v, 1, MaxV)
			{
				if ( ! T->Branch[v]->NodeType )
				{
					ShowBranch(Sh, T, v);
				}
			}

			/*  Print subtrees  */

			ForEach(v, 1, MaxV)
			{
				if ( T->Branch[v]->NodeType )
				{
					ShowBranch(Sh, T, v);
				}
			}
		}
    }
    else
    {
		fprintf(fLog," %s (%.1f", ClassName[T->Leaf], T->Items);
		if ( T->Errors > 0 ) fprintf(fLog,"/%.1f", T->Errors);
		fprintf(fLog,")");
	}
}



/*************************************************************************/
/*									 */
/*	Print a node T with offset Sh, branch value v, and continue	 */
/*									 */
/*************************************************************************/


void ShowBranch(short Sh,Tree T,DiscrValue v)
{
    DiscrValue Pv, Last;
    Attribute Att;
    Boolean FirstValue;
    short TextWidth, Skip, Values=0, i;
    
    Att = T->Tested;

    switch ( T->NodeType )
    {
	case BrDiscr:
	    Indent(Sh, Tab);

	    fprintf(fLog,"%s = %s:", AttName[Att], AttValName[Att][v]);
	    break;

	case ThreshContin:

	    Indent(Sh, Tab);

	    fprintf(fLog,"%s %s %g ",
		    AttName[Att], ( v == 1 ? "<=" : ">" ), T->Cut);

	    if ( T->Lower != T->Upper )
	    {
			fprintf(fLog,"[%g,%g]", T->Lower, T->Upper);
	    }

	    fprintf(fLog,":");
	    break;

	case BrSubset:

	    /*  Count values at this branch  */

	    ForEach(Pv, 1, MaxAttVal[Att])
	    {
			if ( In(Pv, T->Subset[v]) )
			{
				Last = Pv;
				Values++;
			}
	    }
	    if ( ! Values ) return;

	    Indent(Sh, Tab);

	    if ( Values == 1 )
	    {
			fprintf(fLog,"%s = %s:", AttName[Att], AttValName[Att][Last]);
			break;
	    }

	    fprintf(fLog,"%s in {", AttName[Att]);
	    FirstValue = true;
	    Skip = TextWidth = strlen(AttName[Att]) + 5;

	    ForEach(Pv, 1, MaxAttVal[Att])
	    {
			if ( In(Pv, T->Subset[v]) )
			{
				if ( ! FirstValue &&
				 TextWidth + strlen(AttValName[Att][Pv]) + 11 > Width )
				{
		  			Indent(Sh, Tab);
					ForEach(i, 1, Skip) putchar(' ');

					TextWidth = Skip;
					FirstValue = true;
				}

				fprintf(fLog,"%s%c", AttValName[Att][Pv], Pv == Last ? '}' : ',');
				TextWidth += strlen(AttValName[Att][Pv]) + 1;
				FirstValue = false;
			}
	    }
	    putchar(':');
    }

    Show(T->Branch[v], Sh+1);
}



/*************************************************************************/
/*									 */
/*	Find the maximum single line size for non-leaf subtree St.	 */
/*	The line format is						 */
/*			<attribute> <> X.xx:[ <class (<Items>)], or	 */
/*			<attribute> = <DVal>:[ <class> (<Items>)]	 */
/*									 */
/*************************************************************************/


short MaxLine(Tree St)
{
    Attribute a;
    DiscrValue v, MaxV, Next;
    short Ll, MaxLl=0;

    a = St->Tested;

    MaxV = St->Forks;
    ForEach(v, 1, MaxV)
    {
		Ll = ( St->NodeType == 2 ? 4 : strlen(AttValName[a][v]) ) + 1;

		/*  Find the appropriate branch  */

        Next = v;

		if ( ! St->Branch[Next]->NodeType )
		{
			Ll += strlen(ClassName[St->Branch[Next]->Leaf]) + 6;
		}
		MaxLl = Max(MaxLl, Ll);
	}

    return strlen(AttName[a]) + 4 + MaxLl;
}



/*************************************************************************/
/*								   	 */
/*	Indent Sh columns					  	 */
/*								  	 */
/*************************************************************************/
void Indent(short Sh,char * Mark)
{
    fprintf(fLog,"\n");
    while ( Sh-- ) fprintf(fLog,"%s", Mark);
}



/*************************************************************************/
/*									 */
/*	Save entire decision tree T in file with extension Extension	 */
/*									 */
/*************************************************************************/
void SaveTree(Tree T,String Extension)
{
    static char *LastExt="";

    if ( strcmp(LastExt, Extension) )
    {
		LastExt = Extension;

		if ( TRf ) fclose(TRf);

		strcpy(Fn, FileName);
		strcat(Fn, Extension);
		if ( ! ( TRf = fopen(Fn, "wt") ) )
			Error(0, Fn, " for writing");
    }

    //putc('\n', TRf);
    OutTree(T);

    SaveDiscreteNames();
}



/*************************************************************************/
/*									 */
/*	Save tree T as characters					 */
/*									 */
/*************************************************************************/
void  OutTree(Tree T)
{
    DiscrValue v;
    int Bytes;

	// Added by  Huwx 2002.12
	char Enter='\n';
	StreamOut(&Enter,1);
	// End of Added by  Huwx 2002.12

    StreamOut(&T->NodeType,1);
    StreamOut(&T->Leaf, 1);
    StreamOut(&T->Items, 1);
    StreamOut(&T->Errors, 1);
    StreamOut(T->ClassDist, (MaxClass + 1) );

    if ( T->NodeType )
    {
		StreamOut(&T->Tested, 1);
		StreamOut(&T->Forks,1);

		switch ( T->NodeType )
		{
			case BrDiscr:
			break;

			case ThreshContin:
			StreamOut(&T->Cut, 1);
			StreamOut(&T->Lower, 1);
			StreamOut(&T->Upper, 1);
			break;

			case BrSubset:
			Bytes = (MaxAttVal[T->Tested]>>3) + 1;
			ForEach(v, 1, T->Forks)
			{
				StreamOut(T->Subset[v], Bytes);
			}
			break;
		}

		ForEach(v, 1, T->Forks)
		{
			OutTree(T->Branch[v]);
		}
    }
}



/*************************************************************************/
/*									 */
/*	Retrieve entire decision tree with extension Extension		 */
/*									 */
/*************************************************************************/
Tree GetTree(String Extension)
{
    Tree Hold;
    static char *LastExt="";

    if ( strcmp(LastExt, Extension) )
    {
		LastExt = Extension;

		if ( TRf ) fclose(TRf);

		strcpy(Fn, FileName);
		strcat(Fn, Extension);
		if ( ! ( TRf = fopen(Fn, "r") ) ) Error(0, Fn, "");
    }

    if ( ! TRf || getc(TRf) == EOF ) return Nil;

    Hold = InTree();

    RecoverDiscreteNames();

    return Hold;
}



/*************************************************************************/
/*									 */
/*	Retrieve tree from saved characters				 */
/*									 */
/*************************************************************************/
Tree InTree()
{
    Tree T;
    DiscrValue v;
    int Bytes;

    T = (Tree) malloc(sizeof(TreeRec));


    StreamIn(&T->NodeType,1);
    StreamIn(&T->Leaf, 1);
    StreamIn(&T->Items,1);
    StreamIn(&T->Errors, 1);

    T->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
    StreamIn(T->ClassDist, (MaxClass + 1) );

    if ( T->NodeType )
    {
		StreamIn(&T->Tested,1);
		StreamIn(&T->Forks,1);

		switch ( T->NodeType )
		{
			case BrDiscr:
			break;

			case ThreshContin:
			StreamIn(&T->Cut,1);
			StreamIn(&T->Lower, 1);
			StreamIn(&T->Upper, 1);
			break;

			case BrSubset:
			T->Subset = (Set *) calloc(T->Forks + 1, sizeof(Set));

			Bytes = (MaxAttVal[T->Tested]>>3) + 1;
			ForEach(v, 1, T->Forks)
			{
				T->Subset[v] = (Set) malloc(Bytes);
				StreamIn(T->Subset[v], Bytes);
			}
		}

		T->Branch = (Tree *) calloc(T->Forks + 1, sizeof(Tree));
		ForEach(v, 1, T->Forks)
		{
			T->Branch[v] = InTree();
		}
    }

    return T;
}



/*************************************************************************/
/*									 */
/*	Stream characters to/from file TRf from/to an address		 */
/*									 */
/*************************************************************************/
void StreamOut(char * s,int n)
{
	for(int i=0;i<n;i++)
		fprintf(TRf,"%c",s[i]);
}

void StreamOut(short * s,int n)
{
    //while ( n-- ) putc(*s++, TRf);
	// Modify by Huwx 2002.12
	for(int i=0;i<n;i++)
		fprintf(TRf,"%i ",s[i]);
}

void StreamOut(int * s,int n)
{
	for(int i=0;i<n;i++)
		fprintf(TRf,"%i ",s[i]);
}

void StreamOut(float * s,int n)
{
	for(int i=0;i<n;i++)
		fprintf(TRf,"%f ",s[i]);
}

void StreamIn(char *s,int n)
{
    while ( n-- ) *s++ = getc(TRf);
}

void StreamIn(short *s,int n)
{
	int i;
	for(i=0;i<n;i++)
	{
		fscanf(TRf,"%i ",s);
		s++;
	}
}

void StreamIn(int *s,int n)
{
	int i;
	for(i=0;i<n;i++)
	{
		fscanf(TRf,"%i ",s);
		s++;
	}
}

void StreamIn(float *s,int n)
{
	int i;
	for(i=0;i<n;i++)
	{
		fscanf(TRf,"%f ",s);
		s++;
	}
}


/*************************************************************************/
/*									 */
/*	Free up space taken up by tree Node				 */
/*									 */
/*************************************************************************/
void ReleaseTree(Tree Node)
{
    DiscrValue v;

    if ( Node->NodeType )
    {
		ForEach(v, 1, Node->Forks)
		{
			ReleaseTree(Node->Branch[v]);
		}

		delete Node->Branch;

		if ( Node->NodeType == BrSubset )
		{
			delete Node->Subset;
		}

    }

    delete Node->ClassDist;
    delete Node;
}



/*************************************************************************/
/*									 */
/*	Construct a leaf in a given node				 */
/*									 */
/*************************************************************************/


Tree Leaf(ItemCount *ClassFreq,ClassNo NodeClass,ItemCount  Cases,ItemCount  Errors)
{
    Tree Node;

    Node = (Tree) calloc(1, sizeof(TreeRec));

    Node->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
    memcpy(Node->ClassDist, ClassFreq, (MaxClass+1) * sizeof(ItemCount));
    
    Node->NodeType	= 0; 
    Node->Leaf		= NodeClass;
    Node->Items		= Cases;
    Node->Errors	= Errors;

    return Node; 
}



/*************************************************************************/
/*									 */
/*	Insert branches in a node 	                 		 */
/*									 */
/*************************************************************************/
void Sprout(Tree Node,DiscrValue  Branches)
{
    Node->Forks = Branches;
    
    Node->Branch = (Tree *) calloc(Branches+1, sizeof(Tree));
}



/*************************************************************************/
/*									 */
/*	Count the nodes in a tree					 */
/*									 */
/*************************************************************************/
int TreeSize(Tree Node)
{
    int Sum=0;
    DiscrValue v;

    if ( Node->NodeType )
    {
		ForEach(v, 1, Node->Forks)
		{
			Sum += TreeSize(Node->Branch[v]);
		}
    }

    return Sum + 1;
}



/*************************************************************************/
/*									 */
/*	Return a copy of tree T						 */
/*									 */
/*************************************************************************/
Tree CopyTree(Tree T)
{
    DiscrValue v;
    Tree New;

    New = (Tree) malloc(sizeof(TreeRec));
    memcpy(New, T, sizeof(TreeRec));

    New->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
    memcpy(New->ClassDist, T->ClassDist, (MaxClass + 1) * sizeof(ItemCount));

    if ( T->NodeType )
    {
		New->Branch = (Tree *) calloc(T->Forks + 1, sizeof(Tree));
		ForEach(v, 1, T->Forks)
		{
			New->Branch[v] = CopyTree(T->Branch[v]);
		}
	}

    return New;
}



/*************************************************************************/
/*									 */
/*	Save attribute values read with "discrete N"			 */
/*									 */
/*************************************************************************/
void SaveDiscreteNames()
{
    Attribute Att;
    DiscrValue v;
    int Length;

    ForEach(Att, 0, MaxAtt)
    {
		if ( SpecialStatus[Att] != DISCRETE ) continue;

		StreamOut( &MaxAttVal[Att],1);

		ForEach(v, 1, MaxAttVal[Att])
		{
			Length = strlen(AttValName[Att][v]) + 1;

			StreamOut( &Length, 1);
			StreamOut( AttValName[Att][v], Length);
		}
    }
}



/*************************************************************************/
/*									 */
/*	Recover attribute values read with "discrete N"			 */
/*									 */
/*************************************************************************/
void RecoverDiscreteNames()
{
    Attribute Att;
    DiscrValue v;
    char Length;

    ForEach(Att, 0, MaxAtt)
    {
		if ( SpecialStatus[Att] != DISCRETE ) continue;

		StreamIn((&MaxAttVal[Att]), 1);

		ForEach(v, 1, MaxAttVal[Att])
		{
			StreamIn(&Length, 1);

			AttValName[Att][v] = (char *) malloc(Length);
			StreamIn(AttValName[Att][v], Length);
		}
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -