⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 defns.h

📁 这是vc开发的数据挖掘算法中的决策树算法之see5算法源代码.
💻 H
字号:
/*************************************************************************//*									 *//*	Source code for use with See5/C5.0 Release 2.04			 *//*	-----------------------------------------------			 *//*		      Copyright RuleQuest Research 2007			 *//*									 *//*	This code is provided "as is" without warranty of any kind,	 *//*	either express or implied.  All use is at your own risk.	 *//*									 *//*************************************************************************/#include <stdio.h>#include <math.h>#include <string.h>#include <stdlib.h>#include <ctype.h>#include <time.h>#ifdef WIN32#include <windows.h>#endif/*************************************************************************//*									 *//*		Constants, macros etc.					 *//*									 *//*************************************************************************/#define	 SEE5#define	 Nil	   0		/* null pointer */#define	 false	   0#define	 true	   1#define	 None	   -1 #define	 Epsilon   1E-4#define  EXCLUDE   1		/* special attribute status: do not use */#define  SKIP	   2		/* do not use in classifiers */#define  DISCRETE  4		/* ditto: collect values as data read */#define  ORDERED   8		/* ditto: ordered discrete values */#define  DATEVAL   16		/* ditto: YYYY/MM/DD or YYYY-MM-DD */#define  STIMEVAL  32		/* ditto: HH:MM:SS */#define	 TSTMPVAL  64		/* date time */				/* unknown and N/A values are represented by				   unlikely floating-point numbers				   (octal 01600000000 and 01) */#define	 UNKNOWN   01600000000	/* 1.5777218104420236e-30 */#define	 NA	   01		/* 1.4012984643248171e-45 */#define	 BrDiscr   1#define	 BrThresh  2#define	 BrSubset  3#define  Alloc(N,T)		(T *) Pmalloc((N)*sizeof(T))#define  AllocZero(N,T)		(T *) Pcalloc(N, sizeof(T))#define  Realloc(V,N,T)		V = (T *) Prealloc(V, (N)*sizeof(T))#define	 Bit(b)			(1 << (b))#define	 In(b,s)		((s[(b) >> 3]) & Bit((b) & 07))#define	 SetBit(b,s)		(s[(b) >> 3] |= Bit((b) & 07))#define	 ForEach(v,f,l)		for(v=f ; v<=l ; ++v) #define	 StatBit(a,b)		(SpecialStatus[a]&(b))#define	 Exclude(a)		StatBit(a,EXCLUDE)#define	 Skip(a)		StatBit(a,EXCLUDE|SKIP)#define  Discrete(a)		(MaxAttVal[a] || StatBit(a,DISCRETE))#define  Continuous(a)		(! MaxAttVal[a] && ! StatBit(a,DISCRETE))#define	 Ordered(a)		StatBit(a,ORDERED)#define	 DateVal(a)		StatBit(a,DATEVAL)#define	 TimeVal(a)		StatBit(a,STIMEVAL)#define	 TStampVal(a)		StatBit(a,TSTMPVAL)#define  Space(s)		(s==' ' || s=='\n' || s=='\r' || s=='\t')#define  SkipComment		while ( ( c = InChar(f) ) != '\n' && c != EOF )#define	 FreeUnlessNil(p)	if((p)!=Nil) free(p)#define	 Free(x)	 	free(x)#define	 assert(x)#ifdef WIN32#define  rint(x)		((x)<0 ? (double)((int)((x)-0.5)) :\					 (double)((int)((x)+0.5)))#define	 finite(x)		_finite(x)#define	 strdup(x)		_strdup(x)#endif#define	 P1(x)			(rint((x)*10) / 10)#define	 Of			stdout#define	 Goodbye(x)		exit(x)#define	 CharWidth(s)		((int) strlen(s))#define	 NOFILE		 0#define	 BADCLASSTHRESH	 1#define	 LEQCLASSTHRESH	 2#define	 BADATTNAME	 3#define	 EOFINATT	 4#define	 SINGLEATTVAL	 5#define	 BADATTVAL	 6#define	 BADNUMBER	 7#define	 BADCLASS	 8#define	 BADCOSTCLASS	 9#define	 BADCOST	10#define	 NOMEM		11#define	 TOOMANYVALS	12#define	 BADDISCRETE	13#define	 NOTARGET	14#define	 BADCTARGET	15#define	 BADDTARGET	16#define	 LONGNAME	17#define	 HITEOF		18#define	 MISSNAME	19#define	 BADDATE	20#define	 BADTIME	21#define	 BADTSTMP	22#define	 DUPATTNAME	23#define	 UNKNOWNATT	24#define	 BADDEF1	25#define	 BADDEF2	26#define	 BADDEF3	27#define	 BADDEF4	28#define	 SAMEATT	29#define	 MODELFILE	30#define	 CWTATTERR	31/*************************************************************************//*									 *//*		Type definitions					 *//*									 *//*************************************************************************/typedef  unsigned char	Boolean, BranchType, *Set, Byte;typedef	 char		*String;typedef  int	ItemNo;			/* data item number */typedef  float	ItemCount;		/* count of (partial) items */typedef  int	ClassNo,		/* class number, 1..MaxClass */		DiscrValue,		/* discrete attribute value (0 = ?) */		Attribute;		/* attribute number, 1..MaxAtt */typedef	 float	ContValue;		/* continuous attribute value */#define	 PREC	 7			/* precision */typedef  union	 _def_val	 {	    String	_s_val;		/* att val for comparison */	    ContValue	_n_val;		/* number for arith */	 }	 DefVal;typedef  struct  _def_elt	 {	    short	_op_code;	/* type of element */	    DefVal	_operand;	/* string or numeric value */	 }	 DefElt, *Definition;typedef  struct  _elt_rec	 {	    int		Fi,		/* index of first char of element */			Li;		/* last ditto */	    char	Type;		/* 'B', 'S', or 'N' */	 }	 EltRec;#define	 DefOp(DE)	DE._op_code#define	 DefSVal(DE)	DE._operand._s_val#define	 DefNVal(DE)	DE._operand._n_val#define	 OP_ATT			 0	/* opcodes */#define	 OP_NUM			 1#define	 OP_STR			 2#define	 OP_MISS		 3#define	 OP_AND			10#define	 OP_OR			11#define	 OP_EQ			20#define	 OP_NE			21#define	 OP_GT			22#define	 OP_GE			23#define	 OP_LT			24#define	 OP_LE			25#define	 OP_SEQ			26#define	 OP_SNE			27#define	 OP_PLUS		30#define	 OP_MINUS		31#define	 OP_UMINUS		32#define	 OP_MULT		33#define	 OP_DIV			34#define	 OP_MOD			35#define	 OP_POW			36#define	 OP_SIN			40#define	 OP_COS			41#define	 OP_TAN			42#define	 OP_LOG			43#define	 OP_EXP			44#define	 OP_INT			45#define	 OP_END			99typedef  union  _attribute_value	 {	    DiscrValue	_discr_val;	    ContValue	_cont_val;	 }	 AttValue, *Description;#define  CVal(Case,Attribute)   Case[Attribute]._cont_val#define  DVal(Case,Attribute)   Case[Attribute]._discr_val#define  XDVal(Case,Att)	(Case[Att]._discr_val & 077777777)#define  SVal(Case,Attribute)   Case[Attribute]._discr_val#define  Class(Case)		(*Case)._discr_val#define  Weight(Case)		(*(Case-1))._cont_val#define	 Unknown(Case,Att)	(DVal(Case,Att)==UNKNOWN)#define	 UnknownVal(AV)		(AV._discr_val==UNKNOWN)#define	 NotApplic(Case,Att)	(DVal(Case,Att)==NA)#define	 NotApplicVal(AV)	(AV._discr_val==NA)typedef  struct _treerec	*Tree;typedef  struct _treerec	 {	    BranchType	NodeType;	    ClassNo	Leaf;		/* best class at this node */	    ItemCount	Items,		/* no of items at this node */			*ClassDist,	/* class distribution of items */	    		Errors;		/* no of errors at this node */	    Attribute	Tested; 	/* attribute referenced in test */	    int		Forks;		/* number of branches at this node */	    ContValue	Cut,		/* threshold for continuous attribute */		  	Lower,		/* lower limit of soft threshold */		  	Upper,		/* upper limit ditto */			Mid;		/* 50% point */	    Set         *Subset;	/* subsets of discrete values  */	    Tree	*Branch;	/* Branch[x] = subtree for outcome x */	 }	 TreeRec;typedef  int	RuleNo;			/* rule number */typedef  struct _condrec	 {	    BranchType	NodeType;	/* test type (see tree nodes) */	    Attribute	Tested;		/* attribute tested */	    int		Forks;		/* possible branches */	    ContValue	Cut;		/* threshold (if relevant) */	    Set		Subset;		/* subset (if relevant) */	    int		TestValue,	/* specified outcome of test */			TestI;		/* rule tree index of this test */	 }	 CondRec, *Condition;typedef  struct _rulerec	 {	    RuleNo	RNo;		/* rule number */	    int		TNo,		/* trial number */	    		Size;		/* number of conditions */	    Condition	*Lhs;		/* conditions themselves */	    ClassNo	Rhs;		/* class given by rule */	    ItemCount	Cover,		/* number of cases covered by rule */			Correct;	/* number on which correct */	    float	Prior;		/* prior probability of RHS */	    int		Vote;		/* unit = 0.001 */	 }	 RuleRec, *CRule;typedef  struct _ruletreerec *RuleTree;typedef  struct _ruletreerec	 {	    RuleNo	*Fire;		/* rules matched at this node */	    Condition	CondTest;	/* new test */	    int		Forks;		/* number of branches */	    RuleTree	*Branch;	/* subtrees */	 }	 RuleTreeRec;typedef struct _rulesetrec	 {	    RuleNo	SNRules;	/* number of rules */	    CRule	*SRule;		/* rules */	    ClassNo	SDefault;	/* default class for this ruleset */	    RuleTree	RT;		/* rule tree for this ruleset */	 }	 RuleSetRec, *CRuleSet;/*************************************************************************//*									 *//*		Function prototypes					 *//*									 *//*************************************************************************/Boolean	    ReadName(FILE *f, String s, int n, char ColonOpt);void	    GetNames(FILE *Nf);void	    ExplicitAtt(FILE *Nf);int	    Which(String Val, String *List, int First, int Last);int	    InChar(FILE *f);Description GetDescription(FILE *Df, Boolean Train);int	    StoreIVal(String S);void	    CheckValue(Description DVec, Attribute Att);void	    ImplicitAtt(FILE *Nf);void	    ReadDefinition(FILE *f);void	    Append(char c);Boolean	    Expression();Boolean	    Conjunct();Boolean	    SExpression();Boolean	    AExpression();Boolean	    Term();Boolean	    Factor();Boolean	    Primary();Boolean	    Atom();Boolean	    Find(String S);int	    FindOne(String *Alt);Attribute   FindAttName();void	    DefSyntaxError(String Msg);void	    DefSemanticsError(int Fi, String Msg, int OpCode);void	    Dump(char OpCode, ContValue F, String S, int Fi);void	    DumpOp(char OpCode, int Fi);Boolean	    UpdateTStack(char OpCode, ContValue F, String S, int Fi);AttValue    EvaluateDef(Definition D, Description Case);void	    ReadFilePrefix(String Extension);void	    ReadHeader();Tree	    GetTree(String Extension);Tree	    InTree();CRuleSet    GetRules(String Extension);CRuleSet    InRules();CRule	    InRule();Condition   InCondition();void	    ConstructRuleTree(CRuleSet RS);void	    SetTestIndex(Condition C);RuleTree    GrowRT(RuleNo *RR, int RRN, CRule *Rule);int	    DesiredOutcome(CRule R, int TI);int	    SelectTest(RuleNo *RR, int RRN, CRule *Rule);int	    ReadProp(char *Delim);String	    RemoveQuotes(String S);Set	    MakeSubset(Attribute Att);void	    BinRecoverDiscreteNames();Tree	    BinInTree();CRuleSet    BinInRules();void	    StreamIn(String S, int n);Tree	    Leaf(double *Freq, ClassNo NodeClass, ItemCount Items,		 ItemCount Errors);void	    GetMCosts(FILE *f);ClassNo	    TreeClassify(Description CaseDesc, Tree DecisionTree);void	    FollowAllBranches(Description CaseDesc, Tree T, float Fraction);void	    FindLeaf(Description CaseDesc, Tree T, Tree PT, float Wt);ClassNo	    RuleClassify(Description CaseDesc, CRuleSet RS);int	    FindOutcome(Description Case, Condition OneCond);Boolean	    Satisfies(Description CaseDesc, Condition OneCond);Boolean	    Matches(CRule R, Description Case);void	    CheckActiveSpace(int N);void	    MarkActive(RuleTree RT, Description Case);ClassNo	    BoostClassify(Description CaseDesc, int MaxTrial);ClassNo	    SelectClass(ClassNo Default, Boolean UseCosts);ClassNo	    Classify(Description CaseDesc);float	    Interpolate(Tree T, ContValue Val);FILE *	    GetFile(String Extension, String RW);void	    CheckFile(String Extension, Boolean Write);char	    ProcessOption(int Argc, char *Argv[], char *Options);void	    *Pmalloc(size_t Bytes);void	    *Prealloc(void *Present, size_t Bytes);void	    *Pcalloc(size_t Number, unsigned Size);void	    Error(int ErrNo, String S1, String S2);int	    Denominator(ContValue Val);int	    GetInt(String S, int N);int	    DateToDay(String DS);int	    TimeToSecs(String TS);void	    SetTSBase(int y);int	    TStampToMins(String TS);void	    FreeGlobals();void	    FreeCosts();void	    FreeNames();void	    FreeTree(Tree T);void	    FreeRule(CRule R);void	    FreeRuleTree(RuleTree RT);void	    FreeRules(CRuleSet RS);void	    FreeLastCase(Description DVec);void	    FreeVector(void **V, int First, int Last);/*************************************************************************//*									 *//*		Text strings						 *//*									 *//*************************************************************************/#define	 TX_Line(l,f)		"\n*** line %d of `%s': ", l, f#define	 E_NOFILE(f,e)		"cannot open file %s%s\n", f, e#define	 E_BADATTNAME		"`:' or `:=' expected after attribute name"\					" `%s'\n"#define	 E_EOFINATT		"unexpected eof while reading attribute `%s'\n"#define	 E_SINGLEATTVAL(a,v)	"attribute `%s' has only one value `%s'\n",\					a, v#define	 E_DUPATTNAME		"multiple attributes with name `%s'\n"#define	 E_CWTATTERR		"case weight attribute must be continuous\n"#define	 E_BADATTVAL(v,a)	"bad value of `%s' for attribute `%s'\n", v, a#define	 E_BADNUMBER(a)		"value of `%s' changed to `?'\n", a#define	 E_BADCLASS		"bad class value `%s'l\n"#define	 E_BADCLASSTHRESH	"bad class threshold `%s'\n"#define	 E_LEQCLASSTHRESH	"class threshold `%s' <= previous threshold\n"#define	 E_BADCOSTCLASS		"bad class `%s'\n"#define	 E_BADCOST		"bad cost value `%s'\n"#define	 E_NOMEM		"unable to allocate sufficient memory\n"#define	 E_TOOMANYVALS(a,n)	"too many values for attribute `%s'"\					" (max %d)\n", a, n#define	 E_BADDISCRETE		"bad number of discrete values for attribute"\					" `%s'\n"#define	 E_NOTARGET		"target attribute `%s' not found\n"#define	 E_BADCTARGET		"target attribute `%s' must be"\					" type `continuous'\n"#define	 E_BADDTARGET		"target attribute `%s' must be specified by"\					" a list of discrete values\n"#define	 E_LONGNAME		"overlength name: check data file formats\n"#define	 E_HITEOF		"unexpected end of file\n"#define	 E_MISSNAME		"missing name or value before `%s'\n"#define	 E_BADTSTMP(d,a)	"bad timestamp `%s' for attribute `%s'\n", d, a#define	 E_BADDATE(d,a)		"bad date `%s' for attribute `%s'\n", d, a#define	 E_BADTIME(d,a)		"bad time `%s' for attribute `%s'\n", d, a#define	 E_UNKNOWNATT		"unknown attribute name `%s'\n"#define	 E_BADDEF1(a,s,x)	"in definition of attribute `%s':\n"\					"\tat `%.12s': expect %s\n", a, s, x#define	 E_BADDEF2(a,s,x)	"in definition of attribute `%s':\n"\					"\t`%s': %s\n", a, s, x#define	 E_BADDEF3		"cannot define target attribute `%s'\n"#define	 E_BADDEF4		"[warning] target attribute appears in"\					" definition of attribute `%s'\n"#define	 E_SAMEATT(a,b)		"[warning] attribute `%s' is identical to"\					" attribute `%s'\n", a, b#define	 EX_MODELFILE(f)	"file %s incompatible with .names file\n", f#define	 E_MFATT		"undefined or excluded attribute"#define	 E_MFATTVAL		"undefined attribute value"#define	 E_MFCLASS		"undefined class"#define	 E_MFEOF		"unexpected eof"#define	 T_ErrorLimit		"Error limit exceeded\n"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -