⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hooks.c

📁 数据挖掘中的决策树生成的经典的see5软件的源代码。
💻 C
📖 第 1 页 / 共 5 页
字号:
	*PropVal=Nil,	*Unquoted;int	PropValSize=0;#define	PROPS 23#define	ERRORP		0#define ATTP		1#define CLASSP		2#define CUTP		3#define	CONDSP		4#define ELTSP		5#define ENTRIESP	6#define FORKSP		7#define FREQP		8#define IDP		9#define TYPEP		10#define LOWP		11#define MIDP		12#define HIGHP		13#define RESULTP		14#define RULESP		15#define VALP		16#define LIFTP		17#define COVERP		18#define OKP		19#define DEFAULTP	20#define COSTSP		21/*************************************************************************//*									 *//*	Read header information and decide whether model files are	 *//*	in ASCII or binary format					 *//*									 *//*************************************************************************/void ReadFilePrefix(String Extension)/*   --------------  */{#if defined WIN32 || defined _CONSOLE    if ( ! (TRf = GetFile(Extension, "rb")) ) Error(NOFILE, Fn, "");#else    if ( ! (TRf = GetFile(Extension, "r")) ) Error(NOFILE, Fn, "");#endif    StreamIn((char *) &TRIALS, sizeof(int));    if ( memcmp((char *) &TRIALS, "id=", 3) != 0 )    {	BINARY = true;	BinRecoverDiscreteNames();    }    else    {	BINARY = false;	rewind(TRf);	ReadHeader();    }}/*************************************************************************//*								  	 *//*	Read the header information (id, saved names, models)		 *//*								  	 *//*************************************************************************/void ReadHeader()/*   ---------  */{    Attribute	Att;    DiscrValue	v;    char	*p, Dummy;    int		Year, Month, Day;    FILE	*F;    while ( true )    {	switch ( ReadProp(&Dummy) )	{	    case ERRORP:		return;	    case IDP:		/*  Recover year run and set base date for timestamps  */		if ( sscanf(PropVal + strlen(PropVal) - 11,			    "%d-%d-%d\"", &Year, &Month, &Day) == 3 )		{		    SetTSBase(Year);		}		break;	    case COSTSP:		/*  Recover costs file used to generate model  */		if ( (F = GetFile(".costs", "r")) )		{		    GetMCosts(F);		}		break;	    case ATTP:		Unquoted = RemoveQuotes(PropVal);		Att = Which(Unquoted, AttName, 1, MaxAtt);		if ( ! Att || Exclude(Att) )		{		    Error(MODELFILE, E_MFATT, Unquoted);		}		break;	    case ELTSP:		MaxAttVal[Att] = 1;		AttValName[Att][1] = strdup("N/A");		for ( p = PropVal ; *p ; )		{		    p = RemoveQuotes(p);		    v = ++MaxAttVal[Att];		    AttValName[Att][v] = strdup(p);		    for ( p += strlen(p) ; *p != '"' ; p++ )			;		    p++;		    if ( *p == ',' ) p++;		}		AttValName[Att][MaxAttVal[Att]+1] = "<other>";		break;	    case ENTRIESP:		sscanf(PropVal, "\"%d\"", &TRIALS);		Entry = 0;		return;	}    }}/*************************************************************************//*									 *//*	Retrieve decision tree with extension Extension			 *//*									 *//*************************************************************************/Tree GetTree(String Extension)/*   -------  */{    CheckFile(Extension, false);    return ( BINARY ? BinInTree() : InTree() );}Tree InTree()/*   ------  */{    Tree	T;    DiscrValue	v, Subset=0;    char	Delim, *p;    ClassNo	c;    int		X;    double	XD;    T = (Tree) AllocZero(1, TreeRec);    do    {	switch ( ReadProp(&Delim) )	{	    case ERRORP:		return Nil;	    case TYPEP:		sscanf(PropVal, "\"%d\"", &X); T->NodeType = X;		break;	    case CLASSP:		Unquoted = RemoveQuotes(PropVal);		T->Leaf = Which(Unquoted, ClassName, 1, MaxClass);		if ( ! T->Leaf ) Error(MODELFILE, E_MFCLASS, Unquoted);		break;	    case ATTP:		Unquoted = RemoveQuotes(PropVal);		T->Tested = Which(Unquoted, AttName, 1, MaxAtt);		if ( ! T->Tested || Exclude(T->Tested) )		{		    Error(MODELFILE, E_MFATT, Unquoted);		}		break;	    case CUTP:		sscanf(PropVal, "\"%lf\"", &XD);	T->Cut = XD;		T->Lower = T->Mid = T->Upper = T->Cut;		break;	    case LOWP:		sscanf(PropVal, "\"%lf\"", &XD);	T->Lower = XD;		break;	    case MIDP:		sscanf(PropVal, "\"%lf\"", &XD);	T->Mid = XD;		break;	    case HIGHP:		sscanf(PropVal, "\"%lf\"", &XD);	T->Upper = XD;		break;	    case FORKSP:		sscanf(PropVal, "\"%d\"", &T->Forks);		break;	    case FREQP:		T->ClassDist = Alloc(MaxClass+1, ItemCount);		p = PropVal+1;		ForEach(c, 1, MaxClass)		{		    T->ClassDist[c] = strtod(p, &p);		    T->Items += T->ClassDist[c];		    p++;		}		break;	    case ELTSP:		if ( ! Subset++ )		{		    T->Subset = AllocZero(T->Forks+1, Set);		}		T->Subset[Subset] = MakeSubset(T->Tested);		break;	}    }    while ( Delim == ' ' );    if ( T->ClassDist )    {	T->Errors = T->Items - T->ClassDist[T->Leaf];    }    else    {	T->ClassDist = Alloc(1, ItemCount);    }    if ( T->NodeType )    {	T->Branch = AllocZero(T->Forks+1, Tree);	ForEach(v, 1, T->Forks)	{	    T->Branch[v] = InTree();	}    }    return T;}/*************************************************************************//*									 *//*	Retrieve ruleset with extension Extension			 *//*	(Separate functions for ruleset, single rule, single condition)	 *//*									 *//*************************************************************************/CRuleSet GetRules(String Extension)/*	 --------  */{    CheckFile(Extension, false);    return ( BINARY ? BinInRules() : InRules() );}CRuleSet InRules()/*	 -------  */{    CRuleSet	RS;    RuleNo	r;    char	Delim;    RS = Alloc(1, RuleSetRec);    do    {	switch ( ReadProp(&Delim) )	{	    case ERRORP:		return Nil;	    case RULESP:		sscanf(PropVal, "\"%d\"", &RS->SNRules);		CheckActiveSpace(RS->SNRules);		break;	    case DEFAULTP:		Unquoted = RemoveQuotes(PropVal);		RS->SDefault = Which(Unquoted, ClassName, 1, MaxClass);		if ( ! RS->SDefault ) Error(MODELFILE, E_MFCLASS, Unquoted);		break;	}    }    while ( Delim == ' ' );    /*  Read each rule  */    RS->SRule = Alloc(RS->SNRules+1, CRule);    ForEach(r, 1, RS->SNRules)    {	if ( (RS->SRule[r] = InRule()) )	{	    RS->SRule[r]->RNo = r;	    RS->SRule[r]->TNo = Entry;	}    }    ConstructRuleTree(RS);    Entry++;    return RS;}CRule InRule()/*    ------  */{    CRule	R;    int		d;    char	Delim;    float	Lift;    R = Alloc(1, RuleRec);    do    {	switch ( ReadProp(&Delim) )	{	    case ERRORP:		return Nil;	    case CONDSP:		sscanf(PropVal, "\"%d\"", &R->Size);		break;	    case COVERP:		sscanf(PropVal, "\"%f\"", &R->Cover);		break;	    case OKP:		sscanf(PropVal, "\"%f\"", &R->Correct);		break;	    case LIFTP:		sscanf(PropVal, "\"%f\"", &Lift);		R->Prior = (R->Correct + 1) / ((R->Cover + 2) * Lift);		break;	    case CLASSP:		Unquoted = RemoveQuotes(PropVal);		R->Rhs = Which(Unquoted, ClassName, 1, MaxClass);		if ( ! R->Rhs ) Error(MODELFILE, E_MFCLASS, Unquoted);		break;	}    }    while ( Delim == ' ' );    R->Lhs = Alloc(R->Size+1, Condition);    ForEach(d, 1, R->Size)    {	R->Lhs[d] = InCondition();    }    R->Vote = 1000 * (R->Correct + 1.0) / (R->Cover + 2.0) + 0.5;    return R;}Condition InCondition()/*        -----------  */{    Condition	C;    char	Delim;    int		X;    double	XD;    C = Alloc(1, CondRec);    do    {	switch ( ReadProp(&Delim) )	{	    case ERRORP:		return Nil;	    case TYPEP:		sscanf(PropVal, "\"%d\"", &X); C->NodeType = X;		break;	    case ATTP:		Unquoted = RemoveQuotes(PropVal);		C->Tested = Which(Unquoted, AttName, 1, MaxAtt);		if ( ! C->Tested || Exclude(C->Tested) )		{		    Error(MODELFILE, E_MFATT, Unquoted);		}		break;	    case CUTP:		sscanf(PropVal, "\"%lf\"", &XD);	C->Cut = XD;		break;	    case RESULTP:		C->TestValue = ( PropVal[1] == '<' ? 2 : 3 );		break;	    case VALP:		if ( Continuous(C->Tested) )		{		    C->TestValue = 1;		}		else		{		    Unquoted = RemoveQuotes(PropVal);		    C->TestValue = Which(Unquoted,					 AttValName[C->Tested],					 1, MaxAttVal[C->Tested]);		    if ( ! C->TestValue ) Error(MODELFILE, E_MFATTVAL, Unquoted);		}		break;	    case ELTSP:		C->Subset = MakeSubset(C->Tested);		C->TestValue = 1;		break;	}    }    while ( Delim == ' ' );    return C;}Condition	*Test=Nil;int		NTest,		TestSpace,		*TestOccur=Nil,		*RuleCondOK=Nil;Boolean		*TestUsed=Nil;void ConstructRuleTree(CRuleSet RS)/*   -----------------  */{    int		r, c;    RuleNo	*All;    Test = Alloc((TestSpace = 1000), Condition);    NTest = 0;    All = Alloc(RS->SNRules, RuleNo);    ForEach(r, 1, RS->SNRules)    {	All[r-1] = r;	ForEach(c, 1, RS->SRule[r]->Size)	{	    SetTestIndex(RS->SRule[r]->Lhs[c]);	}    }    TestOccur = Alloc(NTest, int);    TestUsed  = AllocZero(NTest, Boolean);    RuleCondOK = AllocZero(RS->SNRules+1, int);    RS->RT = GrowRT(All, RS->SNRules, RS->SRule);    Free(All);    Free(Test);    Free(TestUsed);    Free(TestOccur);    Free(RuleCondOK);}void SetTestIndex(Condition C)/*   ------------  */{    int		t;    Condition	CC;    Attribute	Att;    Att = C->Tested;    ForEach(t, 0, NTest-1)    {	CC = Test[t];	if ( CC->Tested != Att || CC->NodeType != C->NodeType ) continue;	switch ( C->NodeType )	{	    case BrDiscr:		C->TestI = t;		return;	    case BrSubset:		if ( ! memcmp(C->Subset, CC->Subset, (MaxAttVal[Att]>>3)+1) )		{		    C->TestI = t;		    return;		}		break;	    case BrThresh:		if ( C->TestValue == 1 && CC->TestValue == 1 ||		     ( C->TestValue != 1 && CC->TestValue != 1 &&		       C->Cut == CC->Cut ) )		{		    C->TestI = t;		    return;		}		break;	}    }    /*  New test -- make sure have enough space  */    if ( NTest >= TestSpace )    {	Realloc(Test, (TestSpace += 1000), Condition);    }    Test[NTest] = C;    C->TestI = NTest++;}RuleTree GrowRT(RuleNo *RR, int RRN, CRule *Rule)/*       ------  */{    RuleTree	Node;    RuleNo	r, *LR;    int		FP=0, ri, TI, *Expect, LRN;    DiscrValue	v;    if ( ! RRN ) return Nil;    Node = AllocZero(1, RuleTreeRec);    /*  Record and swap to front any rules that are satisfied  */    ForEach(ri, 0, RRN-1)    {	r = RR[ri];	if ( RuleCondOK[r] == Rule[r]->Size )	{	    RR[ri] = RR[FP];	    RR[FP] = r;	    FP++;	}    }    if ( FP )    {	Node->Fire = Alloc(FP+1, RuleNo);	memcpy(Node->Fire, RR, FP * sizeof(RuleNo));	Node->Fire[FP] = 0;	RR  += FP;	RRN -= FP;    }    else    {	Node->Fire = Nil;    }    if ( ! RRN ) return Node;    /*  Choose test for this node  */    TI = SelectTest(RR, RRN, Rule);    TestUsed[TI] = true;    Node->CondTest = Test[TI];    /*  Find the desired outcome for each rule  */    Expect = Alloc(RRN, int);    ForEach(ri, 0, RRN-1)    {	Expect[ri] = DesiredOutcome(Rule[RR[ri]], TI);    }    /*  Now construct individual branches.  Rules that do not reference	the selected test go down branch 0; at classification time,	any case with an unknown outcome for the selected test also	goes to branch 0.  */    Node->Forks =	( Test[TI]->NodeType == BrDiscr ? MaxAttVal[Test[TI]->Tested] :	  Test[TI]->NodeType == BrSubset ? 1 : 3 );    Node->Branch = Alloc(Node->Forks+1, RuleTree);    LR = Alloc(RRN, RuleNo);    ForEach(v, 0, Node->Forks)    {	/*  Extract rules with outcome v and increment conditions satisfied,	    if relevant  */	LRN = 0;	ForEach(ri, 0, RRN-1)	{	    if ( Expect[ri] == v )	    {		LR[LRN++] = RR[ri];		if ( v ) RuleCondOK[RR[ri]]++;	    }	}	/*  LR now contains rules with outcome v  */	Node->Branch[v] = GrowRT(LR, LRN, Rule);	if ( v )	{	    /*  Restore conditions satisfied  */	    ForEach(ri, 0, LRN-1)	    {		RuleCondOK[LR[ri]]--;	    }	}    }    TestUsed[TI] = false;    /*  Free local storage  */    Free(LR);    Free(Expect);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -