⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c45dt.cpp

📁 这是一个改进的C4.5决策数算法C++类
💻 CPP
📖 第 1 页 / 共 5 页
字号:
		switch ( T->NodeType )
		{
			case BrDiscr:
				break;

			case ThreshContin:
				fTreeSwap[iTreeArray]=T->Cut;iTreeArray++;
				fTreeSwap[iTreeArray]=T->Lower;iTreeArray++;
				fTreeSwap[iTreeArray]=T->Upper;iTreeArray++;
				break;

			case BrSubset:
				Bytes = (MaxAttVal[T->Tested]>>3) + 1;
				ForEach(v, 1, T->Forks)
				{
					//fprintf(TRf,"%s,",T->Subset[v]);
				}
				break;
		}

		ForEach(v, 1, T->Forks)
		{
			OutTreeToBuffer(T->Branch[v]);
		}
	}
}


/*************************************************************************/
/*									 */
/*	Retrieve entire decision tree with extension Extension		 */
/*									 */
/*************************************************************************/

Tree C45DT::GetTree(C45_String Extension)
{
    Tree Hold;
    static char *LastExt="";

	if (!TRf) LastExt="";//added by wuxing,2004/10/13

    if ( strcmp(LastExt, Extension) )
    {
	LastExt = Extension;

	if ( TRf ) fclose(TRf);

	strcpy(Fn, FileName);
	strcat(Fn, Extension);
	if ( ! ( TRf = fopen(Fn, "r") ) ) Quinlan_Error(0, Fn, "");
    }

    if ( ! TRf || getc(TRf) == EOF ) return Nil;

    Hold = InTree();

    //RecoverDiscreteNames();

    return Hold;
}

/*************************************************************************/
/*									 */
/*	Retrieve tree from saved characters				 */
/*									 */
/*************************************************************************/



/*************************************************************************/
/*									 */
/*	Retrieve tree from saved characters				 */
/*	modified by wuxing 2003/9/9									 */
/*************************************************************************/
Tree C45DT::InTree()
{
    Tree T;
    DiscrValue v;
    int Bytes,i;

    T = (Tree) malloc(sizeof(TreeRec));
	
	fscanf(TRf,"%d,",&T->NodeType);
    fscanf(TRf,"%d,",&T->Leaf);
	fscanf(TRf,"%f,",&T->Items);
    fscanf(TRf,"%f,",&T->Errors);

	T->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));

	for(i=0;i<MaxClass+1;i++)
		fscanf(TRf,"%f,",&T->ClassDist[i]);


    if ( T->NodeType )
    {
		fscanf(TRf,"%d,",&T->Tested);
		fscanf(TRf,"%d,",&T->Forks);

		switch ( T->NodeType )
		{
			case BrDiscr:
				break;

			case ThreshContin:
				fscanf(TRf,"%f,",&T->Cut);
				fscanf(TRf,"%f,",&T->Lower);
				fscanf(TRf,"%f,",&T->Upper);

				break;

			case BrSubset:
				T->Subset = (C45_Set *) calloc(T->Forks + 1, sizeof(C45_Set));

				Bytes = (MaxAttVal[T->Tested]>>3) + 1;
				ForEach(v, 1, T->Forks)
				{
					T->Subset[v] = (C45_Set) malloc(Bytes);
					fscanf(TRf,"%s,",&T->Subset[v]);
				}
		}

		T->Branch = (Tree *) calloc(T->Forks + 1, sizeof(Tree));
		ForEach(v, 1, T->Forks)
		{
			T->Branch[v] = InTree();
		}
    }

    return T;
}

/*************************************************************************/
/*									 */
/*	Save tree T into Buffer					 */
/*	modified by wuxing 2003/9/20								 */
/*************************************************************************/

Tree C45DT::InTreeFromBuffer()
{
    Tree T;
	DiscrValue v;
    int Bytes,i;
	
    T = (Tree) malloc(sizeof(TreeRec));

	T->NodeType=(int)fTreeSwap[iTreeArray];iTreeArray++;
	T->Leaf=(int)fTreeSwap[iTreeArray];iTreeArray++;
	T->Items=fTreeSwap[iTreeArray];iTreeArray++;
	T->Errors=fTreeSwap[iTreeArray];iTreeArray++;
	
	T->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));

	for(i=0;i<MaxClass+1;i++)
	{
		T->ClassDist[i]=fTreeSwap[iTreeArray];
		iTreeArray++;
	}

    if ( T->NodeType )
    {
		T->Tested=(int)fTreeSwap[iTreeArray];iTreeArray++;
		T->Forks=(int)fTreeSwap[iTreeArray];iTreeArray++;
	
		switch ( T->NodeType )
		{
			case BrDiscr:
				break;

			case ThreshContin:
				T->Cut=fTreeSwap[iTreeArray];iTreeArray++;
				T->Lower=fTreeSwap[iTreeArray];iTreeArray++;
				T->Upper=fTreeSwap[iTreeArray];iTreeArray++;
				break;

			case BrSubset:
				Bytes = (MaxAttVal[T->Tested]>>3) + 1;
				ForEach(v, 1, T->Forks)
				{
					//fprintf(TRf,"%s,",T->Subset[v]);
				}
				break;
		}
		T->Branch = (Tree *) calloc(T->Forks + 1, sizeof(Tree));
		ForEach(v, 1, T->Forks)
		{
			T->Branch[v] = InTreeFromBuffer();
		}
	}

    return T;
}

/*************************************************************************/
/*									 */
/*	Stream characters to/from file TRf from/to an address		 */
/*									 */
/*************************************************************************/

void C45DT::StreamOut(C45_String s,int n)
{
    while ( n-- ) putc(*s++, TRf);
}



void C45DT::StreamIn(C45_String s,int n)
{
    while ( n-- ) *s++ = getc(TRf);
}



/*************************************************************************/
/*									 */
/*	Free up space taken up by tree Node				 */
/*									 */
/*************************************************************************/

void C45DT::ReleaseTree(Tree Node)
{
    DiscrValue v;

    if ( Node->NodeType )
    {
	ForEach(v, 1, Node->Forks)
	{
	    ReleaseTree(Node->Branch[v]);
	}

	//cfree(Node->Branch);
	free(Node->Branch);

	if ( Node->NodeType == BrSubset )
	{
	    //cfree(Node->Subset);
		free(Node->Subset);
	}
    }

    //cfree(Node->ClassDist);
    //cfree(Node);
	free(Node->ClassDist);
	free(Node);
}

/*************************************************************************/
/*									 */
/*	Construct a leaf in a given node				 */
/*									 */
/*************************************************************************/


Tree C45DT::Leaf(ItemCount *ClassFreq,ClassNo NodeClass,ItemCount Cases,ItemCount Errors)
{
    Tree Node;/*树结构体指针*/

    Node = (Tree) calloc(1, sizeof(TreeRec));
/*
	typedef  struct _tree_record
	 {
	    short	NodeType;	 /* 0=leaf 1=branch 2=cut 3=subset 
	    ClassNo	Leaf;		 /* most frequent class at this node 
	    ItemCount	Items,		/* no of items at this node 
			*ClassDist,	 /* class distribution of items 
	    		Errors;		/* no of errors at this node 
	    Attribute	Tested; 	/* attribute referenced in test 
	    short	Forks;		/* number of branches at this node 
	    float	Cut,		/* threshold for continuous attribute 
		  	Lower,		/* lower limit of soft threshold *
		  	Upper;		/* upper limit ditto *
	    C45_Set         *Subset;	/* subsets of discrete values  *
	    Tree	*Branch;	/* Branch[x] = (sub)tree for outcome x *
	 }
*/

    Node->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
    memcpy(Node->ClassDist, ClassFreq, (MaxClass+1) * sizeof(ItemCount));
    
    Node->NodeType	= 0; 
    Node->Leaf		= NodeClass;
    Node->Items		= Cases;
    Node->Errors	= Errors;

    return Node; 
}



/*************************************************************************/
/*									 */
/*	Insert branches in a node 	                 		 */
/*									 */
/*************************************************************************/


void C45DT::Sprout(Tree Node,DiscrValue Branches)
{
    Node->Forks = Branches;//对于离散属性,有几个值(看MaxAttVal)就有几个分支;
    
    Node->Branch = (Tree *) calloc(Branches+1, sizeof(Tree));//分配空间
}

/*************************************************************************/
/*									 */
/*	Count the nodes in a tree					 */
/*									 */
/*************************************************************************/

	
int C45DT::TreeSize(Tree Node)
{
    int Sum=0;
    DiscrValue v;

    if ( Node->NodeType )
    {
	ForEach(v, 1, Node->Forks)
	{
	    Sum += TreeSize(Node->Branch[v]);
	}
    }

    return Sum + 1;
}

/*************************************************************************/
/*									 */
/*	Return a copy of tree T						 */
/*									 */
/*************************************************************************/

Tree C45DT::CopyTree(Tree T)
{
    DiscrValue v;
    Tree New;

    New = (Tree) malloc(sizeof(TreeRec));
    memcpy(New, T, sizeof(TreeRec));

    New->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
    memcpy(New->ClassDist, T->ClassDist, (MaxClass + 1) * sizeof(ItemCount));

    if ( T->NodeType )
    {
		New->Branch = (Tree *) calloc(T->Forks + 1, sizeof(Tree));
		ForEach(v, 1, T->Forks)
		{
			New->Branch[v] = CopyTree(T->Branch[v]);//递归调用
		}
    }

    return New;
}

/*************************************************************************/
/*									 */
/*	Save attribute values read with "discrete N"			 */
/*									 */
/*************************************************************************/


void C45DT::SaveDiscreteNames()
/*  -----------------  */
{
    Attribute Att;
    DiscrValue v;
    int Length;

    ForEach(Att, 0, MaxAtt)
    {
		if ( SpecialStatus[Att] != DISCRETE ) continue;

		StreamOut((char *) &MaxAttVal[Att], sizeof(int));

		ForEach(v, 1, MaxAttVal[Att])
		{
			Length = strlen(AttValName[Att][v]) + 1;

			StreamOut((char *) &Length, sizeof(int));
			StreamOut((char *) AttValName[Att][v], Length);
		}
    }
}

/*************************************************************************/
/*									 */
/*	Recover attribute values read with "discrete N"			 */
/*									 */
/*************************************************************************/


void C45DT::RecoverDiscreteNames()
/*  --------------------  */
{
    Attribute Att;
    DiscrValue v;
    int Length;

    ForEach(Att, 0, MaxAtt)
    {
		if ( SpecialStatus[Att] != DISCRETE ) continue;

		StreamIn((char *) &MaxAttVal[Att], sizeof(int));

		ForEach(v, 1, MaxAttVal[Att])
		{
			StreamIn((char *) &Length, sizeof(int));

			AttValName[Att][v] = (char *) malloc(Length);
			StreamIn((char *) AttValName[Att][v], Length);
		}
    }
}

/*************************************************************************/
/*								 	 */
/*	User interface for consulting trees and rulesets	 	 */
/*	------------------------------------------------	 	 */
/*	userint.c							 	 */
/*************************************************************************/
/*************************************************************************/
/*									 */
/*	Ask for the value of attribute Att if necessary			 */
/*									 */
/*************************************************************************/

void C45DT::CheckValue(Attribute Att,Tree T)
{
    if ( RangeDesc[Att].Asked ) return;

    printf("%s", AttName[Att]);
	if ( RangeDesc[Att].Known )
    {	//如果属性值范围已知,显示出来
		printf(" [ ");
		PrintRange(Att);
		printf(" ]");
    }
    printf(": ");

    ReadRange(Att, T);
}
 
/*************************************************************************/
/*									 */
/*	Print the range of values for attribute Att			 */
/*									 */
/*************************************************************************/

void C45DT::PrintRange(Attribute Att)
{
    DiscrValue dv;
    Boolean First=DTTrue;
    float p;

    if ( MaxAttVal[Att] )  /*  discrete attribute  */
    {
	ForEach(dv, 1, MaxAttVal[Att] )
	{
	    if ( (p = RangeDesc[Att].Probability[dv]) > Fuzz )
	    {
		if ( ! First )
		{
		    printf(", ");
		}
		First = DTFalse;

		printf("%s", AttValName[Att][dv]);
		if ( p < 1-Fuzz )
		{
		    printf(": %.2f", p);
		}
	    }
	}
    }
    else  /*  continuous attribute  */
    {
	printf("%g", RangeDesc[Att].LowerBound);
	if ( RangeDesc[Att].UpperBound > RangeDesc[Att].LowerBound + Fuzz )
	{
	    printf(" - %g", RangeDesc[Att].UpperBound);
	}
    }
}

/*************************************************************************/
/*									 */
/*	Read a range of values for attribute Att or <cr>		 */
/*									 */
/*************************************************************************/

void C45DT::ReadRange(Attribute Att,Tree T)
{
    char c;

    RangeDesc[Att].Asked=DTTrue;

    SkipSpace;

    if ( c == '\n' )
    {
		return;
    }
    if ( c == '?' )
    {
		if ( (c = getchar()) == 't' )
		{
			if ( T ) PrintTree(T);
			SkipLine(c);
			RangeDesc[Att].Asked = DTFalse;
			CheckValue(Att, T);
		}
		else
		{
			RangeDesc[Att].Known = DTFalse;
			SkipLine(c);
		}
		return;
    }

    ungetc(c, stdin);
    RangeDesc[Att].Known = DTTrue;

    if ( MaxAttVal[Att] )
   

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -