⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c4.5rulesgt.c

📁 决策树c4.5算法的c++实现 希望对大家有用
💻 C
📖 第 1 页 / 共 5 页
字号:
	if ( RuleSpace > 100 )	{	    Rule = (PR *) realloc(Rule, RuleSpace * sizeof(PR));	}	else	{	    Rule = (PR *) malloc(RuleSpace * sizeof(PR));	}    }    /*  Form the new rule  */    Rule[NRules].Size = NConds;    Rule[NRules].Lhs = (Condition *) calloc(NConds+1, sizeof(Condition));    ForEach(d, 1, NConds)    {	Rule[NRules].Lhs[d] = (Condition) malloc(sizeof(struct CondRec));	Rule[NRules].Lhs[d]->CondTest = Cond[d]->CondTest;	Rule[NRules].Lhs[d]->TestValue = Cond[d]->TestValue;    }    Rule[NRules].Rhs = TargetClass;    Rule[NRules].Error = Err;    Verbosity(1) PrintRule(NRules);    return true;}/*************************************************************************//*								  	 *//*  Decide whether the given rule duplicates rule r		  	 *//*								  	 *//*************************************************************************/Boolean SameRule(r, Cond, NConds, TargetClass)/*      --------  */    RuleNo r;    Condition Cond[];    short NConds;    ClassNo TargetClass;{    short d, i;    Test SubTest1, SubTest2;    if ( Rule[r].Size != NConds || Rule[r].Rhs != TargetClass )    {	return false;    }    ForEach(d, 1, NConds)    {	if ( Rule[r].Lhs[d]->CondTest->NodeType != Cond[d]->CondTest->NodeType ||	     Rule[r].Lhs[d]->CondTest->Tested   != Cond[d]->CondTest->Tested )	{	    return false;	}	switch ( Cond[d]->CondTest->NodeType )	{	    case BrDiscr:		if ( Rule[r].Lhs[d]->TestValue != Cond[d]->TestValue )		{		    return false;		}		break;	    case ThreshContin:		if ( Rule[r].Lhs[d]->CondTest->Cut != Cond[d]->CondTest->Cut )		{		    return false;		}		break;	    case BrSubset:		SubTest1 = Rule[r].Lhs[d]->CondTest;		SubTest2 = Cond[d]->CondTest;		ForEach(i, 1, SubTest1->Forks)		{		    if ( SubTest1->Subset[i] != SubTest2->Subset[i] )		    {			return false;		    }		}	}    }    return true;}/*************************************************************************//*								  	 *//*		Print the current indexed ruleset		  	 *//*								  	 *//*************************************************************************/    PrintIndexedRules()/*  -----------------  */{    short ri;    ForEach(ri, 1, NRules )    {	PrintRule(RuleIndex[ri]);    }    printf("\nDefault class: %s\n", ClassName[DefaultClass]);}/*************************************************************************//*								  	 *//*		Print the rule r				  	 *//*								  	 *//*************************************************************************/    PrintRule(r)/*  ---------  */    RuleNo r;{    short d;    printf("\nRule %d:\n", r);    ForEach(d, 1, Rule[r].Size)    {        printf("    ");        PrintCondition(Rule[r].Lhs[d]);    }    printf("\t->  class %s  [%.1f%%]\n",	    ClassName[Rule[r].Rhs], 100 * (1 - Rule[r].Error));}/*************************************************************************//*								  	 *//*	Print a condition c of a production rule		  	 *//*								  	 *//*************************************************************************/    PrintCondition(c)/*  --------------  */    Condition c;{    Test tp;    DiscrValue v, pv, Last, Values=0;    Boolean First=true;    Attribute Att;    tp = c->CondTest;    v = c->TestValue;    Att = tp->Tested;    printf("\t%s", AttName[Att]);    if ( v < 0 )    {	printf(" is unknown\n");	return;    }    switch ( tp->NodeType )    {	case BrDiscr:	    printf(" = %s\n", AttValName[Att][v]);	    break;	case ThreshContin:	    printf(" %s %g\n", ( v == 1 ? "<=" : ">" ), tp->Cut);	    break;	case BrSubset:	    /*  Count values at this branch  */	    for ( pv=1 ; Values <= 1 && pv <= MaxAttVal[Att] ; pv++ )	    {		if ( In(pv, tp->Subset[v]) )		{		    Last = pv;		    Values++;		}	    }	    if ( Values == 1 )	    {		printf(" = %s\n", AttValName[Att][Last]);		break;	    }	    printf(" in ");	    ForEach(pv, 1, MaxAttVal[Att])	    {		if ( In(pv, tp->Subset[v]) )		{		    if ( First )		    {			printf("{");			First = false;		    }		    else		    {			printf(", ");		    }		    printf("%s", AttValName[Att][pv]);		}	    }	    printf("}\n");    }}/*************************************************************************//*									 *//*	Tabluate logs and log factorials (to improve speed)		 *//*	--------------------------------				 *//*									 *//*************************************************************************/float	*LogItemNo;double	*LogFact;/*************************************************************************//*									 *//*  Set up the array LogItemNo to contain the logs of integers and	 *//*  the array LogFact to contain logs of factorials (all to base 2)	 *//*									 *//*************************************************************************/    GenerateLogs()/*  ------------  */{    ItemNo i;    LogItemNo = (float *) malloc((MaxItem+100) * sizeof(float));    LogFact = (double *) malloc((MaxItem+100) * sizeof(double));    LogItemNo[0] = -1E38;    LogItemNo[1] = 0;    LogFact[0] = LogFact[1] = 0;    ForEach(i, 2, MaxItem+99)    {	LogItemNo[i] = log((float) i) / Log2;	LogFact[i] = LogFact[i-1] + LogItemNo[i];    }}/*************************************************************************//*									 *//*	Generate all rulesets from the decision trees		  	 *//*	---------------------------------------------		  	 *//*								  	 *//*************************************************************************//*************************************************************************//*								  	 *//*  For each tree, form a set of rules and process them, then form a	 *//*  composite set of rules from all of these sets.		  	 *//*  If there is only one tree, then no composite set is formed.	  	 *//*								  	 *//*  Rulesets are stored in  PRSet[0]  to  PRSet[TRIALS], where    	 *//*  PRSet[TRIALS] contains the composite ruleset.		  	 *//*								  	 *//*  On completion, the current ruleset is the composite ruleset (if one	 *//*  has been made), otherwise the ruleset from the single tree. 	 *//*								  	 *//*************************************************************************/    GenerateRules()/*  -------------  */{    Tree DecisionTree, GetTree();    short t=0, RuleSetSpace=0, r;    /*  Find bits to encode attributes and branches  */    FindTestCodes();    /*  Now process each decision tree  */    while ( DecisionTree = GetTree(".unpruned") )    {	printf("\n------------------\n");	printf("Processing tree %d\n", t);	/*  Form a set of rules from the next tree  */	FormRules(DecisionTree);	/*  Process the set of rules for this trial  */	ConstructRuleset();	printf("\nFinal rules from tree %d:\n", t);	PrintIndexedRules();	    	/*  Make sure there is enough room for the new ruleset  */	if ( t + 1 >= RuleSetSpace )	{	    RuleSetSpace += 10;	    if ( RuleSetSpace > 10 )	    {		PRSet = (RuleSet *) realloc(PRSet, RuleSetSpace * sizeof(RuleSet));	    }	    else	    {		PRSet = (RuleSet *) malloc(RuleSetSpace * sizeof(RuleSet));	    }	}	PRSet[t].SNRules = NRules;	PRSet[t].SRule = Rule;	PRSet[t].SRuleIndex = RuleIndex;	PRSet[t].SDefaultClass = DefaultClass;	++t;    }    if ( ! t )    {	printf("\nERROR:  can't find any decision trees\n");	exit(1);    }    TRIALS = t;    /*  If there is more than one tree in the trees file,	make a composite ruleset of the rules from all trees  */    if ( TRIALS > 1 )    {	CompositeRuleset();    }}/*************************************************************************//*								  	 *//*	Determine code lengths for attributes and branches		 *//*								  	 *//*************************************************************************/    FindTestCodes()/*  -------------  */{    Attribute Att;    DiscrValue v, V;    ItemNo i, *ValFreq;    int PossibleCuts;    float Sum, SumBranches=0, p;    void SwapUnweighted();    BranchBits  = (float *) malloc((MaxAtt+1) * sizeof(float));    ForEach(Att, 0, MaxAtt)    {	if ( (V = MaxAttVal[Att]) )	{	    ValFreq = (ItemNo *) calloc(V+1, sizeof(ItemNo));	    ForEach(i, 0, MaxItem)	    {		ValFreq[DVal(Item[i],Att)]++;	    }	    Sum = 0;	    ForEach(v, 1, V)	    {		if ( ValFreq[v] )		{		    Sum += (ValFreq[v] / (MaxItem+1.0)) *			   (LogItemNo[MaxItem+1] - LogItemNo[ValFreq[v]]);		}	    }	    free(ValFreq);	    BranchBits[Att] = Sum;	}	else	{	    Quicksort(0, MaxItem, Att, SwapUnweighted);	    PossibleCuts = 1;	    ForEach(i, 1, MaxItem)	    {		if ( CVal(Item[i],Att) > CVal(Item[i-1],Att) )		{		    PossibleCuts++;		}	    }	    BranchBits[Att] = PossibleCuts > 1 ?			      1 + LogItemNo[PossibleCuts] / 2 : 0 ;	}	SumBranches += BranchBits[Att];    }    AttTestBits = 0;    ForEach(Att, 0, MaxAtt)    {	if ( (p = BranchBits[Att] / SumBranches) > 0 )	{	    AttTestBits -= p * log(p) / log(2.0);	}    }}/*************************************************************************//*                                                                	 *//*  Exchange items at a and b.  Note:  unlike the similar routine in	 *//*  buildtree, this does not assume that items have a Weight to be	 *//*  swapped as well!							 *//*                                                                	 *//*************************************************************************/void SwapUnweighted(a, b)/*   --------------  */    ItemNo a, b;{    Description Hold;    Hold = Item[a];    Item[a] = Item[b];    Item[b] = Hold;}/*************************************************************************//*								  	 *//*	Form composite ruleset for all trials			  	 *//*								  	 *//*************************************************************************/    CompositeRuleset()/*  ----------------  */{    RuleNo r;    short t, ri;    Boolean NewRule();        InitialiseRules();    /*  Lump together all the rules from each ruleset  */    ForEach(t, 0, TRIALS-1)    {	ForEach(ri, 1, PRSet[t].SNRules)	{	    r = PRSet[t].SRuleIndex[ri];	    NewRule(PRSet[t].SRule[r].Lhs, PRSet[t].SRule[r].Size,		     PRSet[t].SRule[r].Rhs, PRSet[t].SRule[r].Error);	}    }    /*  ... and select a subset in the usual way  */    ConstructRuleset();    printf("\nComposite ruleset:\n");    PrintIndexedRules();    PRSet[TRIALS].SNRules    = NRules;    PRSet[TRIALS].SRule      = Rule;    PRSet[TRIALS].SRuleIndex = RuleIndex;    PRSet[TRIALS].SDefaultClass = DefaultClass;}/*************************************************************************//*								  	 *//*	Form a set of rules from a decision tree			 *//*	----------------------------------------			 *//*								  	 *//*************************************************************************/ItemNo	*TargetClassFreq,	/* [Boolean] */	*Errors,		/* [Condition] */	*Total;			/* [Condition] */float	*Pessimistic,		/* [Condition] */	*Actual,		/* [Condition] */	*CondSigLevel;		/* [Condition] */Boolean	**CondSatisfiedBy,	/* [Condition][ItemNo] */	*Deleted;		/* [Condition] */DiscrValue *SingleValue;	/* [Attribute] */Condition *Stack;short	MaxDisjuncts,	MaxDepth;/*************************************************************************//*								  	 *//*	Form a ruleset from decision tree t			  	 *//*								  	 *//*************************************************************************/    FormRules(t)/*  ----------  */    Tree t;{    short i;    /*  Find essential parameters and allocate storage  */    MaxDepth = 0;    MaxDisjuncts = 0;    TreeParameters(t, 0);    Actual = (float *) calloc(MaxDepth+2, sizeof(float));    Total = (ItemNo *) calloc(MaxDepth+2, sizeof(ItemNo));    Errors = (ItemNo *) calloc(MaxDepth+2, sizeof(ItemNo));    Pessimistic = (float *) calloc(MaxDepth+2, sizeof(float));    CondSigLevel = (float *) calloc(MaxDepth+2, sizeof(float));    TargetClassFreq = (ItemNo *) calloc(2, sizeof(ItemNo));    Deleted = (Boolean *) calloc(MaxDepth+2, sizeof(Boolean));    CondSatisfiedBy = (char **) calloc(MaxDepth+2, sizeof(char *));    Stack = (Condition *) calloc(MaxDepth+2, sizeof(Condition));    ForEach(i, 0, MaxDepth+1)    {	CondSatisfiedBy[i] = (char *) calloc(MaxItem+1, sizeof(char));	Stack[i] = (Condition) malloc(sizeof(struct CondRec));    }    SingleValue = (DiscrValue *) calloc(MaxAtt+1, sizeof(DiscrValue));    InitialiseRules();    /*  Extract and prune disjuncts  */    Scan(t, 0);    /*  Deallocate storage  */    ForEach(i, 0, MaxDepth+1)    {	free(CondSatisfiedBy[i]);	free(Stack[i]);    }    free(Deleted);    free(CondSatisfiedBy);    free(Stack);    free(Actual);    free(Total);    free(Errors);    free(Pessimistic);    free(CondSigLevel);    free(TargetClassFreq);}/*************************************************************************//*                                                                	 *//*  Find the maximum depth and the number of leaves in tree t	  	 *//*  with initial depth d					  	 *//*                                                                	 *//*************************************************************************/    TreeParameters(t, d)/*  ---------------  */    Tree t;    short d;{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -