⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hooks.c

📁 数据挖掘中的决策树生成的经典的see5软件的源代码。
💻 C
📖 第 1 页 / 共 5 页
字号:
    return Node;}int DesiredOutcome(CRule R, int TI)/*  --------------  */{    int c;    ForEach(c, 1, R->Size)    {	if ( R->Lhs[c]->TestI == TI )	{	    return R->Lhs[c]->TestValue;	}    }    return 0;}int SelectTest(RuleNo *RR, int RRN, CRule *Rule)/*  ----------  */{    int		c, cc, ri;    RuleNo	r;    /*  Count test occurrences  */    ForEach(c, 0, NTest-1)    {	TestOccur[c] = 0;    }    ForEach(ri, 0, RRN-1)    {	r = RR[ri];	ForEach(c, 1, Rule[r]->Size)	{	    TestOccur[Rule[r]->Lhs[c]->TestI]++;	}    }    /*  Find most frequently-occurring test  */    cc = -1;    ForEach(c, 0, NTest-1)    {	if ( ! TestUsed[c] && ( cc < 0 || TestOccur[c] > TestOccur[cc] ) )	{	    cc = c;	}    }    return cc;}/*************************************************************************//*									 *//*	ASCII reading utilities						 *//*									 *//*************************************************************************/int ReadProp(char *Delim)/*  --------  */{    int		c, i;    char	*p;    Boolean	Quote=false;    for ( p = PropName ; (c = fgetc(TRf)) != '=' ;  )    {	if ( p - PropName >= 19 || c == EOF )	{	    Error(MODELFILE, E_MFEOF, "");	    PropName[0] = PropVal[0] = *Delim = '\00';	    return 0;	}	*p++ = c;    }    *p = '\00';    for ( p = PropVal ; ((c = fgetc(TRf)) != ' ' && c != '\n') || Quote ; )    {	if ( c == EOF )	{	    Error(MODELFILE, E_MFEOF, "");	    PropName[0] = PropVal[0] = '\00';	    return 0;	}	if ( (i = p - PropVal) >= PropValSize )	{	    Realloc(PropVal, (PropValSize += 10000) + 3, char);	    p = PropVal + i;	}	*p++ = c;	if ( c == '\\' )	{	    *p++ = fgetc(TRf);	}	else	if ( c == '"' )	{	    Quote = ! Quote;	}    }    *p = '\00';    *Delim = c;    return Which(PropName, Prop, 1, PROPS);}String RemoveQuotes(String S)/*     ------------  */{    char	*p, *Start;    p = Start = S;        for ( S++ ; *S != '"' ; S++ )    {	if ( *S == '\\' ) S++;	*p++ = *S;	*S = '-';    }    *p = '\00';    return Start;}Set MakeSubset(Attribute Att)/*  ----------  */{    int		Bytes, b;    char	*p;    Set		S;    Bytes = (MaxAttVal[Att]>>3) + 1;    S = AllocZero(Bytes, Byte);    for ( p = PropVal ; *p ; )    {	p = RemoveQuotes(p);	b = Which(p, AttValName[Att], 1, MaxAttVal[Att]);	if ( ! b ) Error(MODELFILE, E_MFATTVAL, p);	SetBit(b, S);	for ( p += strlen(p) ; *p != '"' ; p++ )	    ;	p++;	if ( *p == ',' ) p++;    }    return S;}/*************************************************************************//*									 *//*	Recover attribute values read with "discrete N"			 *//*									 *//*************************************************************************/void BinRecoverDiscreteNames()/*   -----------------------  */{    Attribute	Att;    DiscrValue	v;    int		Length;    ForEach(Att, 1, MaxAtt)    {	if ( ! StatBit(Att, DISCRETE) ) continue;	StreamIn((char *) &MaxAttVal[Att], sizeof(int));	/*  Insert "N/A"  */	AttValName[Att][1] = strdup("N/A");	MaxAttVal[Att]++;	ForEach(v, 2, MaxAttVal[Att])	{	    StreamIn((char *) &Length, sizeof(int));	    AttValName[Att][v] = Alloc(Length, char);	    StreamIn(AttValName[Att][v], Length);	}	/*  Invisible name for undefined values  */	AttValName[Att][MaxAttVal[Att]+1] = "<other>";    }}/*************************************************************************//*									 *//*	Retrieve tree from saved characters				 *//*									 *//*************************************************************************/Tree BinInTree()/*   ---------  */{    Tree	T;    DiscrValue	v, vv;    int		Bytes;    float	XFl;    Set		S;    T = (Tree) AllocZero(1, TreeRec);    StreamIn((char *) &T->NodeType, sizeof(BranchType));    StreamIn((char *) &T->Leaf, sizeof(ClassNo));    StreamIn((char *) &T->Items, sizeof(ItemCount));    StreamIn((char *) &T->Errors, sizeof(ItemCount));    T->ClassDist = AllocZero(MaxClass+1, ItemCount);    StreamIn((char *) T->ClassDist, (MaxClass + 1) * sizeof(ItemCount));    if ( T->NodeType )    {	StreamIn((char *) &T->Tested, sizeof(Attribute));	StreamIn((char *) &T->Forks, sizeof(int));	T->Forks++;	/* for N/A */	switch ( T->NodeType )	{	    case BrDiscr:		break;	    case BrThresh:		StreamIn((char *) &XFl, sizeof(float));	T->Cut = XFl;		StreamIn((char *) &XFl, sizeof(float));	T->Lower = XFl;		StreamIn((char *) &XFl, sizeof(float));	T->Upper = XFl;		StreamIn((char *) &XFl, sizeof(float));	T->Mid = XFl;		break;	    case BrSubset:		T->Subset = (Set *) AllocZero(T->Forks+1, Set);		Bytes = ((MaxAttVal[T->Tested] - 1) >> 3) + 1;		S = AllocZero(Bytes, Byte);		T->Subset[1] = AllocZero(Bytes, Byte);		SetBit(1, T->Subset[1]);		ForEach(v, 2, T->Forks)		{		    T->Subset[v] = AllocZero(Bytes, Byte);		    StreamIn((char *) S, Bytes);		    ForEach(vv, 1, MaxAttVal[T->Tested]-1)		    {			if ( In(vv, S) ) SetBit(vv+1, T->Subset[v]);		    }		}		Free(S);	}	T->Branch = AllocZero(T->Forks+1, Tree);	/*  Allow for N/A branch  */	T->Branch[1] = Leaf(Nil, T->Leaf, 0.0, 0.0);	ForEach(v, 2, T->Forks)	{	    T->Branch[v] = BinInTree();	}    }    return T;}/*************************************************************************//*								  	 *//*	Recover a ruleset						 *//*								  	 *//*************************************************************************/CRuleSet BinInRules()/*       ----------  */{    int		ri, d, Bytes, Dummy;    CRuleSet	RS;    CRule	R;    Condition	C;    float	XFl;    Set		S;    DiscrValue	vv;    RS = Alloc(1, RuleSetRec);    StreamIn((char *) &RS->SNRules, sizeof(RuleNo));    StreamIn((char *) &RS->SDefault, sizeof(ClassNo));    RS->SRule = Alloc(RS->SNRules+1, CRule);    ForEach(ri, 1, RS->SNRules)    {	R = RS->SRule[ri] = Alloc(1, RuleRec);	StreamIn((char *) &R->RNo, sizeof(int));	StreamIn((char *) &R->TNo, sizeof(int));	StreamIn((char *) &R->Size, sizeof(int));	R->Lhs = Alloc(R->Size+1, Condition);	ForEach(d, 1, R->Size)	{	    C = R->Lhs[d] = Alloc(1, CondRec);	    StreamIn((char *) &C->NodeType, sizeof(BranchType));	    StreamIn((char *) &C->Tested, sizeof(Attribute));	    StreamIn((char *) &Dummy, sizeof(int));	    StreamIn((char *) &XFl, sizeof(float));	C->Cut = XFl;	    if ( C->NodeType == BrSubset )	    {		Bytes = ((MaxAttVal[C->Tested] - 1) >> 3) + 1;		S = AllocZero(Bytes, Byte);		C->Subset = AllocZero(Bytes, Byte);		StreamIn((char *) S, Bytes);		ForEach(vv, 1, MaxAttVal[C->Tested]-1)		{		    if ( In(vv, S) ) SetBit(vv+1, C->Subset);		}		Free(S);	    }	    StreamIn((char *) &R->Lhs[d]->TestValue, sizeof(int));	    R->Lhs[d]->TestValue++;	/* to allow for N/A */	}	StreamIn((char *) &R->Rhs, sizeof(ClassNo));	StreamIn((char *) &R->Cover, sizeof(ItemCount));	StreamIn((char *) &R->Correct, sizeof(ItemCount));	StreamIn((char *) &R->Prior, sizeof(float));	if ( R->Correct < 1 )	{	    /*  Prior to Release 1.11  */	    R->Correct = (R->Cover + 2) * (1 - R->Correct) - 1;	    memcpy(&R->Vote, &R->Prior, sizeof(int));	    R->Prior = 1E38;	}	else	{	    R->Vote = 1000 * (R->Correct + 1.0) / (R->Cover + 2.0) + 0.5;	}    }    return RS;}/*************************************************************************//*								  	 *//*	Character stream read for binary routines			 *//*								  	 *//*************************************************************************/void StreamIn(String S, int n)/*   --------  */{    while ( n-- ) *S++ = getc(TRf);}/*************************************************************************//*									 *//*	Construct a leaf in a given node				 *//*									 *//*************************************************************************/Tree Leaf(double *Freq, ClassNo NodeClass, ItemCount Items, ItemCount Errors)/*   ----  */{    Tree	Node;    ClassNo	c;    Node = AllocZero(1, TreeRec);    Node->ClassDist = AllocZero(MaxClass+1, ItemCount);    if ( Freq )    {	ForEach(c, 1, MaxClass)	{	    Node->ClassDist[c] = Freq[c];	}    }    Node->NodeType	= 0;    Node->Leaf		= NodeClass;    Node->Items		= Items;    Node->Errors	= Errors;    return Node;}/*************************************************************************//*									 *//*	Read variable misclassification costs				 *//*									 *//*************************************************************************/void GetMCosts(FILE *Cf)/*   ---------  */{    ClassNo	Pred, Real, p, r;    char	Name[1000];    float	Val;    LineNo = 0;    /*  Read entries from cost file  */    while ( ReadName(Cf, Name, 1000, ':') )    {	if ( ! (Pred = Which(Name, ClassName, 1, MaxClass)) )	{	    Error(BADCOSTCLASS, Name, "");	}	if ( ! ReadName(Cf, Name, 1000, ':') ||	     ! (Real = Which(Name, ClassName, 1, MaxClass)) )	{	    Error(BADCOSTCLASS, Name, "");	}	if ( ! ReadName(Cf, Name, 1000, ':') ||	     sscanf(Name, "%f", &Val) != 1 || Val < 0 )	{	    Error(BADCOST, "", "");	    Val = 1;	}	if ( Pred > 0 && Real > 0 && Pred != Real && Val != 1 )	{	    /*  Have a non-trivial cost entry  */	    if ( ! MCost )	    {		/*  Set up cost matrices  */		MCost = Alloc(MaxClass+1, float *);		ForEach(p, 1, MaxClass)		{		    MCost[p] = Alloc(MaxClass+1, float);		    ForEach(r, 1, MaxClass)		    {			MCost[p][r] = ( p == r ? 0.0 : 1.0 );		    }		}	    }	    MCost[Pred][Real] = Val;	}    }    fclose(Cf);}/*************************************************************************//*                                                              	 *//*	Categorize a case description using the given decision tree	 *//*                                                              	 *//*************************************************************************/ClassNo TreeClassify(Description Case, Tree DecisionTree)/*      ------------  */{    ClassNo	c;    ForEach(c, 0, MaxClass)    {	ClassSum[c] = 0;    }    FindLeaf(Case, DecisionTree, Nil, 1.0);    return SelectClass(1, (Boolean)(MCost != Nil));}/*************************************************************************//*                                                              	 *//*	Classify a case description using the given subtree by		 *//*	adjusting the value ClassSum for each class			 *//*                                                              	 *//*************************************************************************/void FollowAllBranches(Description Case, Tree T, float Fraction)/*   -----------------  */{    DiscrValue	v;    ForEach(v, 1, T->Forks)    {	FindLeaf(Case, T->Branch[v], T,		 (Fraction * T->Branch[v]->Items) / T->Items);    }}/*************************************************************************//*                                                              	 *//*	Classify a case description using the given subtree by		 *//*	adjusting the value ClassSum for each class			 *//*                                                              	 *//*************************************************************************/void FindLeaf(Description Case, Tree T, Tree PT, float Fraction)/*   --------  */{    DiscrValue	v, Dv;    ClassNo	c;    float	NewFrac, BrWt[4];    switch ( T->NodeType )    {	case 0:  /* leaf */	  LeafUpdate:	    /*  Use parent node if effectively no cases at this node  */	    if ( T->Items < Epsilon )	    {		T = PT;	    }	    /*  Update from all classes  */	    ForEach(c, 1, MaxClass)	    {		ClassSum[c] += Fraction * T->ClassDist[c] / T->Items;	    }	    return;	case BrDiscr:  /* test of discrete attribute */	    Dv = DVal(Case, T->Tested);	/* > MaxAttVal if unknown */	    if ( Dv <= T->Forks )	/*  Make sure not new discrete value  */	    {		FindLeaf(Case, T->Branch[Dv], T, Fraction);	    }	    else	    {		FollowAllBranches(Case, T, Fraction);	    }	    return;	case BrThresh:  /* test of continuous attribute */	    if ( Unknown(Case, T->Tested) )	    {		FollowAllBranches(Case, T, Fraction);	    }	    else	    if ( NotApplic(Case, T->Tested) )	    {		FindLeaf(Case, T->Branch[1], T, Fraction);	    }	    else	    {		/*  Find weights for <= and > branches, interpolating if		    probabilistic thresholds are used  */		BrWt[2] = Interpolate(T, CVal(Case, T->Tested));		BrWt[3] = 1 - BrWt[2];		ForEach(v, 2, 3)		{		    if ( (NewFrac = Fraction * BrWt[v]) >= 0.01 )		    {			FindLeaf(Case, T->Branch[v], T, NewFrac);		    }		}	    }	    return;	case BrSubset:  /* subset test on discrete attribute  */	    Dv = DVal(Case, T->Tested);	/* > MaxAttVal if unknown */	    if ( Dv <= MaxAttVal[T->Tested] )	    {		ForEach(v, 1, T->Forks)		{		    if ( In(Dv, T->Subset[v]) )		    {			FindLeaf(Case, T->Branch[v], T, Fraction);			return;		    }		}		/* Value not found in any subset -- treat as leaf  */		goto LeafUpdate;	    }	    else	    {		FollowAllBranches(Case, T, Fraction);	    }    }}/*************************************************************************//*                                                              	 *//*	Categorize a case description using a ruleset			 *//*                                                              	 *//*************************************************************************/ClassNo RuleClas

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -