⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rules.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
字号:
/*************************************************************************/
/*								  	 */
/*	Miscellaneous routines for rule handling		  	 */
/*	----------------------------------------		  	 */
/*								  	 */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif


extern FILE *fLog;
/*********************************************/
/* This is from Ruleinex.i    **/

extern PR	*Rule;		/* production rules */

extern RuleNo	NRules,		/* number of production rules */
		*RuleIndex;	/* index to production rules */

extern short	RuleSpace;	/* space currently allocated for rules */

extern RuleSet	*PRSet;		/* set of rulesets */

extern ClassNo	DefaultClass;	/* default class associated with ruleset */

extern Boolean	SIGTEST,	/* use Fisher's test in rule pruning */
		SIMANNEAL;	/* use simulated annealing */

extern float	SIGTHRESH,	/* sig level used in rule pruning */
		REDUNDANCY,	/* factor governing encoding tradeoff
				   between rules and exceptions */
		AttTestBits,	/* average bits to encode tested attribute */
		*BranchBits;	/* ditto attribute value */

extern float	*LogItemNo;	/* LogItemNo[i] = log2(i) */
extern double	*LogFact;	/* LogFact[i] = log2(i!) */
/*************************************************/

extern  FILE	*TRf;		/* rules file */

Test	*TestVec;
short	NTests = 0;
extern char	Fn[500];	/* file name */


/*************************************************************************/
/*								  	 */
/*  Save the current ruleset in rules file in order of the index  	 */
/*								  	 */
/*************************************************************************/
void SaveRules()
{
    short ri, d, v, Bytes;
    RuleNo r;
    Test Tst;

    if ( TRf ) fclose(TRf);

    strcpy(Fn, FileName);
    strcat(Fn, ".rules");
    if ( ! ( TRf = fopen(Fn, "wt") ) ) Error(0, Fn, " for writing");
    
    StreamOut(&NRules, 1);
    StreamOut(&DefaultClass, 1);

    ForEach(ri, 1, NRules)
    {
		r = RuleIndex[ri];
		StreamOut(&Rule[r].Size, 1);
		ForEach(d, 1, Rule[r].Size)
		{
			Tst = Rule[r].Lhs[d]->CondTest;

			StreamOut(&Tst->NodeType, 1);
			StreamOut(&Tst->Tested,1);
			StreamOut(&Tst->Forks, 1);
			StreamOut(&Tst->Cut,1);
			if ( Tst->NodeType == BrSubset )
			{
				Bytes = (MaxAttVal[Tst->Tested]>>3) + 1;
				ForEach(v, 1, Tst->Forks)
				{
					StreamOut( Tst->Subset[v], Bytes);
				}
			}
			StreamOut(&Rule[r].Lhs[d]->TestValue, 1);
		}
		StreamOut(&Rule[r].Rhs, 1);
		StreamOut(&Rule[r].Error, 1);
    }

    SaveDiscreteNames();
}



/*************************************************************************/
/*                                                                	 */
/*	Get a new ruleset from rules file			  	 */
/*                                                                	 */
/*************************************************************************/
void GetRules()
{
    RuleNo nr, r;
    short n, d, v, Bytes;
    Condition *Cond;
    Test Tst;
    ClassNo c;
    float e;

    if ( TRf ) fclose(TRf);

    strcpy(Fn, FileName);
    strcat(Fn, ".rules");
    if ( ! ( TRf = fopen(Fn, "r") ) ) Error(0, Fn, "");
    
    StreamIn(&nr, 1);
    StreamIn(&DefaultClass,1);

    ForEach(r, 1, nr)
    {
        StreamIn(&n, 1);
		Cond = (Condition *) calloc(n+1, sizeof(Condition));
		ForEach(d, 1, n)
		{
			Tst = (Test) malloc(sizeof(struct TestRec));

			StreamIn(&Tst->NodeType, 1);
			StreamIn(&Tst->Tested, 1);
			StreamIn(&Tst->Forks, 1);
			StreamIn(&Tst->Cut, 1);
			if ( Tst->NodeType == BrSubset )
			{
				Tst->Subset = (Set *) calloc(Tst->Forks + 1, sizeof(Set));

				Bytes = (MaxAttVal[Tst->Tested]>>3) + 1;
				ForEach(v, 1, Tst->Forks)
				{
					Tst->Subset[v] = (Set) malloc(Bytes);
					StreamIn(Tst->Subset[v], Bytes);
				}
			}

			Cond[d] = (Condition) malloc(sizeof(struct CondRec));
			Cond[d]->CondTest = FindTest(Tst);
			StreamIn(&Cond[d]->TestValue, 1);
		}
		StreamIn(&c, 1);
		StreamIn(&e, 1);
		NewRule(Cond, n, c, e);
		delete Cond;
	}

    RecoverDiscreteNames();
}



/*************************************************************************/
/*								  	 */
/*  Find a test in the test vector; if it's not there already, add it	 */
/*								  	 */
/*************************************************************************/
Test FindTest(Test Newtest)
{
    static short TestSpace=0;
    short i;

    ForEach(i, 1, NTests)
    {
		if ( SameTest(Newtest, TestVec[i]) )
		{
			delete Newtest;
			return TestVec[i];
		}
    }

    NTests++;
    if ( NTests >= TestSpace )
    {
		TestSpace += 1000;
		if ( TestSpace > 1000 )
		{
			TestVec = (Test *) realloc(TestVec, TestSpace * sizeof(Test));
		}
		else
		{
			TestVec = (Test *) malloc(TestSpace * sizeof(Test));
		}
    }

    TestVec[NTests] = Newtest;

    return TestVec[NTests];
}



/*************************************************************************/
/*								  	 */
/*	See if test t1 is the same test as test t2		  	 */
/*								  	 */
/*************************************************************************/


BOOL SameTest(Test t1, Test t2)
{
    short i;

    if ( t1->NodeType != t2->NodeType ||
	t1->Tested != t2->Tested )
    {
		return false;
    }

    switch ( t1->NodeType )
    {
	case BrDiscr:       return true;
	case ThreshContin:  return  t1->Cut == t2->Cut;
	case BrSubset:      
		ForEach(i, 1, t1->Forks)
		{
			if ( t1->Subset[i] != t2->Subset[i] )
			{
				return false;
			}
		}
    }
    return true;
}

/*************************************************************************/
/*								  	 */
/*		Clear for new set of rules			  	 */
/*								  	 */
/*************************************************************************/
void InitialiseRules()
/*  ----------------  */
{
    NRules = 0;
    Rule = 0;
    RuleSpace = 0;
}



/*************************************************************************/
/*								  	 */
/*  Add a new rule to the current ruleset, by updating Rule[],	  	 */
/*  NRules and, if necessary, RuleSpace				  	 */
/*								  	 */
/*************************************************************************/
Boolean NewRule(Condition Cond[], short NConds, ClassNo TargetClass,float Err)
{
    short d, r;

    /*  See if rule already exists  */

    ForEach(r, 1, NRules)
    {
		if ( SameRule(r, Cond, NConds, TargetClass) )
		{
			Verbosity(1) fprintf(fLog,"\tduplicates rule %d\n", r);

			/*  Keep the most pessimistic error estimate  */

			if ( Err > Rule[r].Error )
			{
				Rule[r].Error = Err;
			}

			return false;
		}
    }

    /*  Make sure there is enough room for the new rule  */

    NRules++;
    if ( NRules >= RuleSpace )
    {
		RuleSpace += 100;
		if ( RuleSpace > 100 )
		{
			Rule = (PR *) realloc(Rule, RuleSpace * sizeof(PR));
		}
		else
		{
			Rule = (PR *) malloc(RuleSpace * sizeof(PR));
		}
    }

    /*  Form the new rule  */

    Rule[NRules].Size = NConds;
    Rule[NRules].Lhs = (Condition *) calloc(NConds+1, sizeof(Condition));
    ForEach(d, 1, NConds)
    {
		Rule[NRules].Lhs[d] = (Condition) malloc(sizeof(struct CondRec));

		Rule[NRules].Lhs[d]->CondTest = Cond[d]->CondTest;
		Rule[NRules].Lhs[d]->TestValue = Cond[d]->TestValue;
    }
    Rule[NRules].Rhs = TargetClass;
    Rule[NRules].Error = Err;

    Verbosity(1) PrintRule(NRules);

    return true;
}



/*************************************************************************/
/*								  	 */
/*  Decide whether the given rule duplicates rule r		  	 */
/*								  	 */
/*************************************************************************/


Boolean SameRule(RuleNo r, Condition Cond[], short NConds, ClassNo TargetClass)
{
    short d, i;
    Test SubTest1, SubTest2;

    if ( Rule[r].Size != NConds || Rule[r].Rhs != TargetClass )
    {
		return false;
    }

    ForEach(d, 1, NConds)
    {
		if ( Rule[r].Lhs[d]->CondTest->NodeType != Cond[d]->CondTest->NodeType ||
			 Rule[r].Lhs[d]->CondTest->Tested   != Cond[d]->CondTest->Tested )
		{
			return false;
		}

		switch ( Cond[d]->CondTest->NodeType )
		{
		case BrDiscr:
			if ( Rule[r].Lhs[d]->TestValue != Cond[d]->TestValue )
			{
				return false;
			}
			break;
		case ThreshContin:
			if ( Rule[r].Lhs[d]->CondTest->Cut != Cond[d]->CondTest->Cut )
			{
				return false;
			}
			break;
		
		case BrSubset:
			SubTest1 = Rule[r].Lhs[d]->CondTest;
			SubTest2 = Cond[d]->CondTest;
			ForEach(i, 1, SubTest1->Forks)
			{
				if ( SubTest1->Subset[i] != SubTest2->Subset[i] )
				{
				return false;
				}
			}
		}
    }

    return true;
}



/*************************************************************************/
/*								  	 */
/*		Print the current indexed ruleset		  	 */
/*								  	 */
/*************************************************************************/
void PrintIndexedRules()
{
    short ri;

    ForEach(ri, 1, NRules )
    {
		PrintRule(RuleIndex[ri]);
    }
    fprintf(fLog,"\nDefault class: %s\n", ClassName[DefaultClass]);
}



/*************************************************************************/
/*								  	 */
/*		Print the rule r				  	 */
/*								  	 */
/*************************************************************************/
void PrintRule(RuleNo r)
{
    short d;

    fprintf(fLog,"\nRule %d:\n", r);
    ForEach(d, 1, Rule[r].Size)
    {
        fprintf(fLog,"    ");
        PrintCondition(Rule[r].Lhs[d]);
    }
    fprintf(fLog,"\t->  class %s  [%.1f%%]\n",
	    ClassName[Rule[r].Rhs], 100 * (1 - Rule[r].Error));
}



/*************************************************************************/
/*								  	 */
/*	Print a condition c of a production rule		  	 */
/*								  	 */
/*************************************************************************/
void PrintCondition(Condition c)
{
    Test tp;
    DiscrValue v, pv, Last, Values=0;
    Boolean First=true;
    Attribute Att;

    tp = c->CondTest;
    v = c->TestValue;
    Att = tp->Tested;

    fprintf(fLog,"\t%s", AttName[Att]);

    if ( v < 0 )
    {
		fprintf(fLog," is unknown\n");
		return;
		}

    switch ( tp->NodeType )
    {
	case BrDiscr:
	    fprintf(fLog," = %s\n", AttValName[Att][v]);
	    break;

	case ThreshContin:
	    fprintf(fLog," %s %g\n", ( v == 1 ? "<=" : ">" ), tp->Cut);
	    break;

	case BrSubset:
	    /*  Count values at this branch  */

	    for ( pv=1 ; Values <= 1 && pv <= MaxAttVal[Att] ; pv++ )
	    {
			if ( In(pv, tp->Subset[v]) )
			{
				Last = pv;
				Values++;
			}
	    }

	    if ( Values == 1 )
	    {
			fprintf(fLog," = %s\n", AttValName[Att][Last]);
			break;
	    }

	    fprintf(fLog," in ");
	    ForEach(pv, 1, MaxAttVal[Att])
	    {
			if ( In(pv, tp->Subset[v]) )
			{
				if ( First )
				{
					fprintf(fLog,"{");
					First = false;
				}
				else
				{
					fprintf(fLog,", ");
				}
				fprintf(fLog,"%s", AttValName[Att][pv]);
			}
	    }
	    fprintf(fLog,"}\n");
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -