📄 c45dt.cpp
字号:
switch ( T->NodeType )
{
case BrDiscr:
break;
case ThreshContin:
fTreeSwap[iTreeArray]=T->Cut;iTreeArray++;
fTreeSwap[iTreeArray]=T->Lower;iTreeArray++;
fTreeSwap[iTreeArray]=T->Upper;iTreeArray++;
break;
case BrSubset:
Bytes = (MaxAttVal[T->Tested]>>3) + 1;
ForEach(v, 1, T->Forks)
{
//fprintf(TRf,"%s,",T->Subset[v]);
}
break;
}
ForEach(v, 1, T->Forks)
{
OutTreeToBuffer(T->Branch[v]);
}
}
}
/*************************************************************************/
/* */
/* Retrieve entire decision tree with extension Extension */
/* */
/*************************************************************************/
Tree C45DT::GetTree(C45_String Extension)
{
Tree Hold;
static char *LastExt="";
if (!TRf) LastExt="";//added by wuxing,2004/10/13
if ( strcmp(LastExt, Extension) )
{
LastExt = Extension;
if ( TRf ) fclose(TRf);
strcpy(Fn, FileName);
strcat(Fn, Extension);
if ( ! ( TRf = fopen(Fn, "r") ) ) Quinlan_Error(0, Fn, "");
}
if ( ! TRf || getc(TRf) == EOF ) return Nil;
Hold = InTree();
//RecoverDiscreteNames();
return Hold;
}
/*************************************************************************/
/* */
/* Retrieve tree from saved characters */
/* */
/*************************************************************************/
/*************************************************************************/
/* */
/* Retrieve tree from saved characters */
/* modified by wuxing 2003/9/9 */
/*************************************************************************/
Tree C45DT::InTree()
{
Tree T;
DiscrValue v;
int Bytes,i;
T = (Tree) malloc(sizeof(TreeRec));
fscanf(TRf,"%d,",&T->NodeType);
fscanf(TRf,"%d,",&T->Leaf);
fscanf(TRf,"%f,",&T->Items);
fscanf(TRf,"%f,",&T->Errors);
T->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
for(i=0;i<MaxClass+1;i++)
fscanf(TRf,"%f,",&T->ClassDist[i]);
if ( T->NodeType )
{
fscanf(TRf,"%d,",&T->Tested);
fscanf(TRf,"%d,",&T->Forks);
switch ( T->NodeType )
{
case BrDiscr:
break;
case ThreshContin:
fscanf(TRf,"%f,",&T->Cut);
fscanf(TRf,"%f,",&T->Lower);
fscanf(TRf,"%f,",&T->Upper);
break;
case BrSubset:
T->Subset = (C45_Set *) calloc(T->Forks + 1, sizeof(C45_Set));
Bytes = (MaxAttVal[T->Tested]>>3) + 1;
ForEach(v, 1, T->Forks)
{
T->Subset[v] = (C45_Set) malloc(Bytes);
fscanf(TRf,"%s,",&T->Subset[v]);
}
}
T->Branch = (Tree *) calloc(T->Forks + 1, sizeof(Tree));
ForEach(v, 1, T->Forks)
{
T->Branch[v] = InTree();
}
}
return T;
}
/*************************************************************************/
/* */
/* Save tree T into Buffer */
/* modified by wuxing 2003/9/20 */
/*************************************************************************/
Tree C45DT::InTreeFromBuffer()
{
Tree T;
DiscrValue v;
int Bytes,i;
T = (Tree) malloc(sizeof(TreeRec));
T->NodeType=(int)fTreeSwap[iTreeArray];iTreeArray++;
T->Leaf=(int)fTreeSwap[iTreeArray];iTreeArray++;
T->Items=fTreeSwap[iTreeArray];iTreeArray++;
T->Errors=fTreeSwap[iTreeArray];iTreeArray++;
T->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
for(i=0;i<MaxClass+1;i++)
{
T->ClassDist[i]=fTreeSwap[iTreeArray];
iTreeArray++;
}
if ( T->NodeType )
{
T->Tested=(int)fTreeSwap[iTreeArray];iTreeArray++;
T->Forks=(int)fTreeSwap[iTreeArray];iTreeArray++;
switch ( T->NodeType )
{
case BrDiscr:
break;
case ThreshContin:
T->Cut=fTreeSwap[iTreeArray];iTreeArray++;
T->Lower=fTreeSwap[iTreeArray];iTreeArray++;
T->Upper=fTreeSwap[iTreeArray];iTreeArray++;
break;
case BrSubset:
Bytes = (MaxAttVal[T->Tested]>>3) + 1;
ForEach(v, 1, T->Forks)
{
//fprintf(TRf,"%s,",T->Subset[v]);
}
break;
}
T->Branch = (Tree *) calloc(T->Forks + 1, sizeof(Tree));
ForEach(v, 1, T->Forks)
{
T->Branch[v] = InTreeFromBuffer();
}
}
return T;
}
/*************************************************************************/
/* */
/* Stream characters to/from file TRf from/to an address */
/* */
/*************************************************************************/
void C45DT::StreamOut(C45_String s,int n)
{
while ( n-- ) putc(*s++, TRf);
}
void C45DT::StreamIn(C45_String s,int n)
{
while ( n-- ) *s++ = getc(TRf);
}
/*************************************************************************/
/* */
/* Free up space taken up by tree Node */
/* */
/*************************************************************************/
void C45DT::ReleaseTree(Tree Node)
{
DiscrValue v;
if ( Node->NodeType )
{
ForEach(v, 1, Node->Forks)
{
ReleaseTree(Node->Branch[v]);
}
//cfree(Node->Branch);
free(Node->Branch);
if ( Node->NodeType == BrSubset )
{
//cfree(Node->Subset);
free(Node->Subset);
}
}
//cfree(Node->ClassDist);
//cfree(Node);
free(Node->ClassDist);
free(Node);
}
/*************************************************************************/
/* */
/* Construct a leaf in a given node */
/* */
/*************************************************************************/
Tree C45DT::Leaf(ItemCount *ClassFreq,ClassNo NodeClass,ItemCount Cases,ItemCount Errors)
{
Tree Node;/*树结构体指针*/
Node = (Tree) calloc(1, sizeof(TreeRec));
/*
typedef struct _tree_record
{
short NodeType; /* 0=leaf 1=branch 2=cut 3=subset
ClassNo Leaf; /* most frequent class at this node
ItemCount Items, /* no of items at this node
*ClassDist, /* class distribution of items
Errors; /* no of errors at this node
Attribute Tested; /* attribute referenced in test
short Forks; /* number of branches at this node
float Cut, /* threshold for continuous attribute
Lower, /* lower limit of soft threshold *
Upper; /* upper limit ditto *
C45_Set *Subset; /* subsets of discrete values *
Tree *Branch; /* Branch[x] = (sub)tree for outcome x *
}
*/
Node->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
memcpy(Node->ClassDist, ClassFreq, (MaxClass+1) * sizeof(ItemCount));
Node->NodeType = 0;
Node->Leaf = NodeClass;
Node->Items = Cases;
Node->Errors = Errors;
return Node;
}
/*************************************************************************/
/* */
/* Insert branches in a node */
/* */
/*************************************************************************/
void C45DT::Sprout(Tree Node,DiscrValue Branches)
{
Node->Forks = Branches;//对于离散属性,有几个值(看MaxAttVal)就有几个分支;
Node->Branch = (Tree *) calloc(Branches+1, sizeof(Tree));//分配空间
}
/*************************************************************************/
/* */
/* Count the nodes in a tree */
/* */
/*************************************************************************/
int C45DT::TreeSize(Tree Node)
{
int Sum=0;
DiscrValue v;
if ( Node->NodeType )
{
ForEach(v, 1, Node->Forks)
{
Sum += TreeSize(Node->Branch[v]);
}
}
return Sum + 1;
}
/*************************************************************************/
/* */
/* Return a copy of tree T */
/* */
/*************************************************************************/
Tree C45DT::CopyTree(Tree T)
{
DiscrValue v;
Tree New;
New = (Tree) malloc(sizeof(TreeRec));
memcpy(New, T, sizeof(TreeRec));
New->ClassDist = (ItemCount *) calloc(MaxClass+1, sizeof(ItemCount));
memcpy(New->ClassDist, T->ClassDist, (MaxClass + 1) * sizeof(ItemCount));
if ( T->NodeType )
{
New->Branch = (Tree *) calloc(T->Forks + 1, sizeof(Tree));
ForEach(v, 1, T->Forks)
{
New->Branch[v] = CopyTree(T->Branch[v]);//递归调用
}
}
return New;
}
/*************************************************************************/
/* */
/* Save attribute values read with "discrete N" */
/* */
/*************************************************************************/
void C45DT::SaveDiscreteNames()
/* ----------------- */
{
Attribute Att;
DiscrValue v;
int Length;
ForEach(Att, 0, MaxAtt)
{
if ( SpecialStatus[Att] != DISCRETE ) continue;
StreamOut((char *) &MaxAttVal[Att], sizeof(int));
ForEach(v, 1, MaxAttVal[Att])
{
Length = strlen(AttValName[Att][v]) + 1;
StreamOut((char *) &Length, sizeof(int));
StreamOut((char *) AttValName[Att][v], Length);
}
}
}
/*************************************************************************/
/* */
/* Recover attribute values read with "discrete N" */
/* */
/*************************************************************************/
void C45DT::RecoverDiscreteNames()
/* -------------------- */
{
Attribute Att;
DiscrValue v;
int Length;
ForEach(Att, 0, MaxAtt)
{
if ( SpecialStatus[Att] != DISCRETE ) continue;
StreamIn((char *) &MaxAttVal[Att], sizeof(int));
ForEach(v, 1, MaxAttVal[Att])
{
StreamIn((char *) &Length, sizeof(int));
AttValName[Att][v] = (char *) malloc(Length);
StreamIn((char *) AttValName[Att][v], Length);
}
}
}
/*************************************************************************/
/* */
/* User interface for consulting trees and rulesets */
/* ------------------------------------------------ */
/* userint.c */
/*************************************************************************/
/*************************************************************************/
/* */
/* Ask for the value of attribute Att if necessary */
/* */
/*************************************************************************/
void C45DT::CheckValue(Attribute Att,Tree T)
{
if ( RangeDesc[Att].Asked ) return;
printf("%s", AttName[Att]);
if ( RangeDesc[Att].Known )
{ //如果属性值范围已知,显示出来
printf(" [ ");
PrintRange(Att);
printf(" ]");
}
printf(": ");
ReadRange(Att, T);
}
/*************************************************************************/
/* */
/* Print the range of values for attribute Att */
/* */
/*************************************************************************/
void C45DT::PrintRange(Attribute Att)
{
DiscrValue dv;
Boolean First=DTTrue;
float p;
if ( MaxAttVal[Att] ) /* discrete attribute */
{
ForEach(dv, 1, MaxAttVal[Att] )
{
if ( (p = RangeDesc[Att].Probability[dv]) > Fuzz )
{
if ( ! First )
{
printf(", ");
}
First = DTFalse;
printf("%s", AttValName[Att][dv]);
if ( p < 1-Fuzz )
{
printf(": %.2f", p);
}
}
}
}
else /* continuous attribute */
{
printf("%g", RangeDesc[Att].LowerBound);
if ( RangeDesc[Att].UpperBound > RangeDesc[Att].LowerBound + Fuzz )
{
printf(" - %g", RangeDesc[Att].UpperBound);
}
}
}
/*************************************************************************/
/* */
/* Read a range of values for attribute Att or <cr> */
/* */
/*************************************************************************/
void C45DT::ReadRange(Attribute Att,Tree T)
{
char c;
RangeDesc[Att].Asked=DTTrue;
SkipSpace;
if ( c == '\n' )
{
return;
}
if ( c == '?' )
{
if ( (c = getchar()) == 't' )
{
if ( T ) PrintTree(T);
SkipLine(c);
RangeDesc[Att].Asked = DTFalse;
CheckValue(Att, T);
}
else
{
RangeDesc[Att].Known = DTFalse;
SkipLine(c);
}
return;
}
ungetc(c, stdin);
RangeDesc[Att].Known = DTTrue;
if ( MaxAttVal[Att] )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -