⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c4.5rulesgt.c

📁 决策树c4.5算法的c++实现 希望对大家有用
💻 C
📖 第 1 页 / 共 5 页
字号:
/*************************************************************************//*									 *//*		Definitions used in C4.5				 *//*              ------------------------				 *//*									 *//*************************************************************************/#include <stdio.h>#include <math.h>#define	 Eof			EOF             /*char read on end of file*/#define	 Nil			0               /*null pointer*/#define	 false			0 #define	 true			1 #define	 None			-1#define	 Epsilon                1E-3//long	 random();#define	 Random			((rand()&2147483647) / 2147483648.0)#define	 Max(a,b)               ((a)>(b) ? a : b) #define	 Min(a,b)               ((a)<(b) ? a : b) #define	 Round(x)		((int) (x+0.5))#define	 Log2			0.69314718055994530942#define	 Log(x)			((x) <= 0 ? 0.0 : log((float)x) / Log2)#define	 Bit(b)			(1 << (b))#define	 In(b,s)		((s[(b) >> 3]) & Bit((b) & 07))#define	 ClearBits(n,s)		memset(s,0,n)#define	 CopyBits(n,f,t)	memcpy(t,f,n)#define	 SetBit(b,s)		(s[(b) >> 3] |= Bit((b) & 07))#define	 ForEach(v,f,l)		for(v=f ; v<=l ; ++v) #define	 Verbosity(d)		if(VERBOSITY >= d)#define	 Check(v,l,h)\	     if ( v<l||v>h ) {printf("\t** illegal value **\n"); exit(1);}/*************************************************************************//*									 *//*		Type definitions for C4.5				 *//*              -------------------------				 *//*									 *//*************************************************************************/typedef  char	Boolean, *String, *Set;typedef  int	ItemNo;		/* data item number */typedef  float	ItemCount;	/* count of (partial) items */typedef  short	ClassNo,	/* class number, 0..MaxClass */		DiscrValue;	/* discrete attribute value (0 = ?) */typedef  short	Attribute;	/* attribute number, 0..MaxAtt */typedef  union  _attribute_value	 {	    DiscrValue	_discr_val;	    float	_cont_val;	 }	 	AttValue, *Description;#define  CVal(Case,Attribute)   Case[Attribute]._cont_val#define  DVal(Case,Attribute)   Case[Attribute]._discr_val#define  Class(Case)		Case[MaxAtt+1]._discr_val#define  Unknown  -999		/* unknown value for continuous attribute */#define  BrDiscr	1	/* node types:	branch */#define  ThreshContin	2	/*		threshold cut */#define  BrSubset	3	/*		subset test */typedef  struct _tree_record *Tree;typedef  struct _tree_record	 {	    short	NodeType;	/* 0=leaf 1=branch 2=cut 3=subset */	    ClassNo	Leaf;		/* most frequent class at this node */	    ItemCount	Items,		/* no of items at this node */			*ClassDist,	/* class distribution of items */	    		Errors;		/* no of errors at this node */	    Attribute	Tested; 	/* attribute referenced in test */	    short	Forks;		/* number of branches at this node */	    float	Cut,		/* threshold for continuous attribute */		  	Lower,		/* lower limit of soft threshold */		  	Upper;		/* upper limit ditto */	    Set         *Subset;	/* subsets of discrete values  */	    Tree	*Branch;	/* Branch[x] = (sub)tree for outcome x */	 }		TreeRec;#define  IGNORE		1	/* special attribute status: do not use */#define  DISCRETE	2	/* ditto: collect values as data read */typedef short	RuleNo;			/* rule number */typedef struct TestRec *Test;struct TestRec       {	   short	NodeType;	/* test type (see tree nodes) */	   Attribute	Tested;		/* attribute tested */	   short	Forks;		/* possible branches */	   float	Cut;		/* threshold (if relevant) */	   Set		*Subset;	/* subset (if relevant) */       };typedef struct CondRec *Condition;struct CondRec       {	   Test		CondTest;	/* test part of condition */	   short	TestValue;	/* specified outcome of test */       };typedef struct ProdRuleRec PR;struct ProdRuleRec       {	   short	Size;		/* number of conditions */	   Condition	*Lhs;		/* conditions themselves */	   ClassNo	Rhs;		/* class given by rule */	   float	Error,		/* estimated error rate */			Bits;		/* bits to encode rule */	   ItemNo	Used,		/* times rule used */			Incorrect;	/* times rule incorrect */       };typedef struct RuleSetRec RuleSet;struct RuleSetRec       {	   PR		*SRule;		/* rules */	   RuleNo	SNRules,	/* number of rules */			*SRuleIndex;	/* ranking of rules */	   ClassNo	SDefaultClass;	/* default class for this ruleset */    };/*************************************************************************//*									 *//*  Main routine for constructing sets of production rules from trees	 *//*  -----------------------------------------------------------------	 *//*									 *//*************************************************************************/	/*  External data.  Note: uncommented variables have the same meaning	    as for decision trees  */short		MaxAtt, MaxClass, MaxDiscrVal;ItemNo		MaxItem;Description	*Item;DiscrValue	*MaxAttVal;char		*SpecialStatus;String		*ClassName,		*AttName,		**AttValName,		FileName = "DF";short		VERBOSITY = 0,		TRIALS;Boolean		UNSEENS	  = false,		SIGTEST	  = false,	/* use significance test in rule pruning */		SIMANNEAL = false;	/* use simulated annealing */float		SIGTHRESH   = 0.05,		CF	    = 0.25,		REDUNDANCY  = 1.0;	/* factor that guesstimates the					   amount of redundancy and					   irrelevance in the attributes */PR		*Rule;			/* current rules */RuleNo		NRules = 0,		/* number of current rules */		*RuleIndex;		/* rule index */short		RuleSpace = 0;		/* space allocated for rules */ClassNo		DefaultClass;		/* current default class */RuleSet		*PRSet;			/* sets of rulesets */float		AttTestBits,		/* bits to encode tested att */		*BranchBits;		/* ditto attribute value */int optind = 1;
char *optarg;


    getopt(Argc, Argv, Str)
/*  ------  */
    int Argc;
    char **Argv, *Str;
{
    int Optchar;
    char *Option;

    if ( optind >= Argc ) return EOF;

    Option = Argv[optind++];

    if ( *Option++ != '-' ) return '?';

    Optchar = *Option++;

    while ( *Str && *Str != Optchar ) Str++;
    if ( ! *Str ) return '?';

    if ( *++Str == ':' )
    {
	if ( *Option ) optarg = Option;
	else
	if ( optind < Argc ) optarg = Argv[optind++];
	else
	Optchar = '?';
    }

    return Optchar;
}    main(Argc, Argv)/*  ----  */    int Argc;    char *Argv[];{    int o;    extern char *optarg;    Boolean FirstTime=true;    PrintHeader("rule generator");
	printf("%s\n",Argv[2]);
	scanf("%s",Argv[2]);
	printf("%s\n",Argv[1]);
	printf("%s\n",Argv[2]);	
	printf("%s\n",Argv[3]);    /*  Process options  */    while ( (o = getopt(Argc, Argv, "f:uv:c:r:F:a")) != EOF )    {	if ( FirstTime )	{	    printf("\n    Options:\n");	    FirstTime = false;	}	switch (o)	{	    case 'f':	FileName = optarg;			printf("\tFile stem <%s>\n", FileName);			break;	    case 'u':	UNSEENS = true;			printf("\tRulesets evaluated on unseen cases\n");			break;	    case 'v':	VERBOSITY = atoi(optarg);			printf("\tVerbosity level %d\n", VERBOSITY);			break;	    case 'c':	CF = atof(optarg);			printf("\tPruning confidence level %g%%\n", CF);			Check(CF, 0, 100);			CF /= 100;			break;	    case 'r':	REDUNDANCY = atof(optarg);			printf("\tRedundancy factor %g\n", REDUNDANCY);			Check(REDUNDANCY, 0, 10000);			break;	    case 'F':	SIGTHRESH = atof(optarg);			printf("\tSignificance test in rule pruning, ");			printf("threshold %g%%\n", SIGTHRESH);			Check(SIGTHRESH, 0, 100);		 	SIGTHRESH /= 100;			SIGTEST = true;			break;	    case 'a':	SIMANNEAL = true;			printf("\tSimulated annealing for selecting rules\n");			break;	    case '?':	printf("unrecognised option\n");			exit(1);	}    }    /*  Initialise  */    GetNames();    GetData(".data");    printf("\nRead %d cases (%d attributes) from %s\n",	   MaxItem+1, MaxAtt+1, FileName);    GenerateLogs();    /*  Construct rules  */    GenerateRules();    /*  Evaluations  */    printf("\n\nEvaluation on training data (%d items):\n", MaxItem+1);    EvaluateRulesets(true);    /*  Save current ruleset  */    SaveRules();    if ( UNSEENS )    {	GetData(".test");	printf("\nEvaluation on test data (%d items):\n", MaxItem+1);	EvaluateRulesets(false);    }    exit(0);}/*************************************************************************//*								  	 *//*	Miscellaneous routines for rule handling		  	 *//*	----------------------------------------		  	 *//*								  	 *//*************************************************************************/extern  FILE	*TRf;		/* rules file */Test	*TestVec;short	NTests = 0;FILE	*fopen();extern char	Fn[500];	/* file name *//*************************************************************************//*								  	 *//*  Save the current ruleset in rules file in order of the index  	 *//*								  	 *//*************************************************************************/    SaveRules()/*  ----------  */{    short ri, d, v, Bytes;    RuleNo r;    Test Tst;    if ( TRf ) fclose(TRf);    strcpy(Fn, FileName);    strcat(Fn, ".rules");    if ( ! ( TRf = fopen(Fn, "w") ) ) Error(0, Fn, " for writing");        StreamOut((char *) &NRules, sizeof(RuleNo));    StreamOut((char *) &DefaultClass, sizeof(ClassNo));    ForEach(ri, 1, NRules)    {	r = RuleIndex[ri];        StreamOut((char *) &Rule[r].Size, sizeof(short));	ForEach(d, 1, Rule[r].Size)	{	    Tst = Rule[r].Lhs[d]->CondTest;	    StreamOut((char *) &Tst->NodeType, sizeof(short));	    StreamOut((char *) &Tst->Tested, sizeof(Attribute));	    StreamOut((char *) &Tst->Forks, sizeof(short));	    StreamOut((char *) &Tst->Cut, sizeof(float));	    if ( Tst->NodeType == BrSubset )	    {		Bytes = (MaxAttVal[Tst->Tested]>>3) + 1;		ForEach(v, 1, Tst->Forks)		{		    StreamOut((char *) Tst->Subset[v], Bytes);		}	    }	    StreamOut((char *) &Rule[r].Lhs[d]->TestValue, sizeof(short));	}	StreamOut((char *) &Rule[r].Rhs, sizeof(ClassNo));	StreamOut((char *) &Rule[r].Error, sizeof(float));    }    SaveDiscreteNames();}/*************************************************************************//*                                                                	 *//*	Get a new ruleset from rules file			  	 *//*                                                                	 *//*************************************************************************/    GetRules()/*  ---------  */{    RuleNo nr, r;    short n, d, v, Bytes;    Condition *Cond;    Test Tst, FindTest();    ClassNo c;    float e;    Boolean NewRule();    if ( TRf ) fclose(TRf);    strcpy(Fn, FileName);    strcat(Fn, ".rules");    if ( ! ( TRf = fopen(Fn, "r") ) ) Error(0, Fn, "");        StreamIn((char *) &nr, sizeof(RuleNo));    StreamIn((char *) &DefaultClass, sizeof(ClassNo));    ForEach(r, 1, nr)    {        StreamIn((char *) &n, sizeof(short));	Cond = (Condition *) calloc(n+1, sizeof(Condition));	ForEach(d, 1, n)	{	    Tst = (Test) malloc(sizeof(struct TestRec));	    StreamIn((char *) &Tst->NodeType, sizeof(short));	    StreamIn((char *) &Tst->Tested, sizeof(Attribute));	    StreamIn((char *) &Tst->Forks, sizeof(short));	    StreamIn((char *) &Tst->Cut, sizeof(float));	    if ( Tst->NodeType == BrSubset )	    {		Tst->Subset = (Set *) calloc(Tst->Forks + 1, sizeof(Set));		Bytes = (MaxAttVal[Tst->Tested]>>3) + 1;		ForEach(v, 1, Tst->Forks)		{		    Tst->Subset[v] = (Set) malloc(Bytes);		    StreamIn((char *) Tst->Subset[v], Bytes);		}	    }	    Cond[d] = (Condition) malloc(sizeof(struct CondRec));	    Cond[d]->CondTest = FindTest(Tst);	    StreamIn((char *) &Cond[d]->TestValue, sizeof(short));	}	StreamIn((char *) &c, sizeof(ClassNo));	StreamIn((char *) &e, sizeof(float));	NewRule(Cond, n, c, e);	free(Cond);    }    RecoverDiscreteNames();}/*************************************************************************//*								  	 *//*  Find a test in the test vector; if it's not there already, add it	 *//*								  	 *//*************************************************************************/Test FindTest(Newtest)/*   ---------  */    Test Newtest;{    static short TestSpace=0;    short i;    Boolean SameTest();    ForEach(i, 1, NTests)    {	if ( SameTest(Newtest, TestVec[i]) )	{	    free(Newtest);	    return TestVec[i];	}    }    NTests++;    if ( NTests >= TestSpace )    {	TestSpace += 1000;	if ( TestSpace > 1000 )	{	    TestVec = (Test *) realloc(TestVec, TestSpace * sizeof(Test));	}	else	{	    TestVec = (Test *) malloc(TestSpace * sizeof(Test));	}    }    TestVec[NTests] = Newtest;    return TestVec[NTests];}/*************************************************************************//*								  	 *//*	See if test t1 is the same test as test t2		  	 *//*								  	 *//*************************************************************************/Boolean SameTest(t1, t2)/*      ---------  */    Test t1, t2;{    short i;    if ( t1->NodeType != t2->NodeType ||	t1->Tested != t2->Tested )    {	return false;    }    switch ( t1->NodeType )    {	case BrDiscr:       return true;	case ThreshContin:  return  t1->Cut == t2->Cut;	case BrSubset:      ForEach(i, 1, t1->Forks)			    {		 		if ( t1->Subset[i] != t2->Subset[i] )				{				    return false;				}			    }    }    return true;}/*************************************************************************//*								  	 *//*		Clear for new set of rules			  	 *//*								  	 *//*************************************************************************/    InitialiseRules()/*  ----------------  */{    NRules = 0;    Rule = 0;    RuleSpace = 0;}/*************************************************************************//*								  	 *//*  Add a new rule to the current ruleset, by updating Rule[],	  	 *//*  NRules and, if necessary, RuleSpace				  	 *//*								  	 *//*************************************************************************/Boolean NewRule(Cond, NConds, TargetClass, Err)/*       -------  */    Condition Cond[];    short NConds;    ClassNo TargetClass;    float Err;{    short d, r;    Boolean SameRule();    /*  See if rule already exists  */    ForEach(r, 1, NRules)    {	if ( SameRule(r, Cond, NConds, TargetClass) )	{	    Verbosity(1) printf("\tduplicates rule %d\n", r);	    /*  Keep the most pessimistic error estimate  */	    if ( Err > Rule[r].Error )	    {		Rule[r].Error = Err;	    }	    return false;	}    }    /*  Make sure there is enough room for the new rule  */    NRules++;    if ( NRules >= RuleSpace )    {	RuleSpace += 100;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -