⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mybase.cpp

📁 实现决策树分类训练试验。 源自c4.5
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*************************************************************************/
/*									 */
/*	Get names of classes, attributes and attribute values		 */
/*	-----------------------------------------------------		 */
/*									 */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

extern FILE *fLog, * fRST;

/*************************************************************************/
/*									 */
/*  Compute the additional errors if the error rate increases to the	 */
/*  upper limit of the confidence level.  The coefficient is the	 */
/*  square of the number of standard deviations corresponding to the	 */
/*  selected confidence level.  (Taken from Documenta Geigy Scientific	 */
/*  Tables (Sixth Edition), p185 (with modifications).)			 */
/*									 */
/*************************************************************************/


float Val[] = {  0,  0.001, 0.005, 0.01, 0.05, 0.10, 0.20, 0.40, 1.00},
      Dev[] = {4.0,  3.09,  2.58,  2.33, 1.65, 1.28, 0.84, 0.25, 0.00};


char	Delimiter;
/*************************************************************************/
/*									 */
/*  Read a name from file f into string s, setting Delimiter.		 */
/*									 */
/*  - Embedded periods are permitted, but periods followed by space	 */
/*    characters act as delimiters.					 */
/*  - Embedded spaces are permitted, but multiple spaces are replaced	 */
/*    by a single space.						 */
/*  - Any character can be escaped by '\'.				 */
/*  - The remainder of a line following '|' is ignored.			 */
/*									 */
/*************************************************************************/
BOOL ReadName(FILE *f, String s)
{
    register char *Sp=s;
    register int c;

    /*  Skip to first non-space character  */

    while ( ( c = getc(f) ) == '|' || Space(c) )
    {
		if ( c == '|' ) SkipComment;
    }

    /*  Return false if no names to read  */

    if ( c == EOF )
    {
		Delimiter = EOF;
		return false;
    }

    /*  Read in characters up to the next delimiter  */

    while ( c != ':' && c != ',' && c != '\n' && c != '|' && c != EOF )
    {
		if ( c == '.' )
		{
			if ( ( c = getc(f) ) == '|' || Space(c) ) break;
			*Sp++ = '.';
		}

		if ( c == '\\' )
		{
			c = getc(f);
		}

		*Sp++ = c;

		if ( c == ' ' )
		{
			while ( ( c = getc(f) ) == ' ' )
			;
		}
		else
		{
			c = getc(f);
		}
    }

    if ( c == '|' ) SkipComment;
    Delimiter = c;

    /*  Strip trailing spaces  */

    while ( Space(*(Sp-1)) ) Sp--;

    *Sp++ = '\0';
    return true;
}

/*************************************************************************/
/*									 */
/*  Read the names of classes, attributes and legal attribute values.	 */
/*  On completion, these names are stored in:				 */
/*	ClassName	-  class names					 */
/*	AttName		-  attribute names				 */
/*	AttValName	-  attribute value names			 */
/*  with:								 */
/*	MaxAttVal	-  number of values for each attribute		 */
/*									 */
/*  Other global variables set are:					 */
/*	MaxAtt		-  maximum attribute number			 */
/*	MaxClass	-  maximum class number				 */
/*	MaxDiscrVal	-  maximum discrete values for any attribute	 */
/*									 */
/*  Note:  until the number of attributes is known, the name		 */
/*	   information is assembled in local arrays			 */
/*									 */
/*************************************************************************/
void GetNames()
{
    FILE *Nf;//, *fopen();
    char Fn[100], Buffer[1000];
    DiscrValue v;
    int AttCeiling=100, ClassCeiling=100, ValCeiling;

    /*  Open names file  */

    strcpy(Fn, FileName);
    strcat(Fn, ".names");
    if ( ! ( Nf = fopen(Fn, "r") ) ) Error(0, Fn, "");

    /*  Get class names from names file  */

    ClassName = (String *) calloc(ClassCeiling, sizeof(String));
    MaxClass = -1;
    do
    {
		ReadName(Nf, Buffer);

		if ( ++MaxClass >= ClassCeiling)
		{
			ClassCeiling += 100;
			ClassName = (String *) realloc(ClassName, ClassCeiling*sizeof(String));
		}
		ClassName[MaxClass] = CopyString(Buffer);
    }while ( Delimiter == ',' );

    /*  Get attribute and attribute value names from names file  */

    AttName = (String *) calloc(AttCeiling, sizeof(String));
    MaxAttVal = (DiscrValue *) calloc(AttCeiling, sizeof(DiscrValue));
    AttValName = (String **) calloc(AttCeiling, sizeof(String *));
    SpecialStatus = (char *) malloc(AttCeiling);

    MaxAtt = -1;
    while ( ReadName(Nf, Buffer) )
    {
		if ( Delimiter != ':' ) Error(1, Buffer, "");

		if ( ++MaxAtt >= AttCeiling )
		{
			AttCeiling += 100;
			AttName = (String *) realloc(AttName, AttCeiling*sizeof(String));
			MaxAttVal = (DiscrValue *) realloc(MaxAttVal, AttCeiling*sizeof(DiscrValue));
			AttValName = (String **) realloc(AttValName, AttCeiling*sizeof(String *));
			SpecialStatus = (char *) realloc(SpecialStatus, AttCeiling);
		}

		AttName[MaxAtt] = CopyString(Buffer);
		SpecialStatus[MaxAtt] = Nil;
		MaxAttVal[MaxAtt] = 0;
		ValCeiling = 100;
		AttValName[MaxAtt] = (String *) calloc(ValCeiling, sizeof(String));

		do
		{
			if ( ! ( ReadName(Nf, Buffer) ) ) Error(2, AttName[MaxAtt], "");

			if ( ++MaxAttVal[MaxAtt] >= ValCeiling )
			{
				ValCeiling += 100;
				AttValName[MaxAtt] =
					(String *) realloc(AttValName[MaxAtt], ValCeiling*sizeof(String));
			}

			AttValName[MaxAtt][MaxAttVal[MaxAtt]] = CopyString(Buffer);
		}
		while ( Delimiter == ',' );

		if ( MaxAttVal[MaxAtt] == 1 )
		{
			/*  Check for special treatment  */

			if ( ! strcmp(Buffer, "continuous") )
			{}
			else
			if ( ! memcmp(Buffer, "discrete", 8) )
			{
				SpecialStatus[MaxAtt] = DISCRETE;

				/*  Read max values, reserve space and check MaxDiscrVal  */

				v = atoi(&Buffer[8]);
				if ( v < 2 )
				{
					fprintf(fLog,"** %s: illegal number of discrete values\n",
					   AttName[MaxAtt]);
					exit(1);
				}

				AttValName[MaxAtt] =
					(String *) realloc(AttValName[MaxAtt], (v+2)*sizeof(String));
				AttValName[MaxAtt][0] = (char *) v;
				if ( v > MaxDiscrVal ) MaxDiscrVal = v;
			}
			else if ( ! strcmp(Buffer, "ignore") )
			{
				SpecialStatus[MaxAtt] = MYIGNORE;
			}
			else
			{
				/*  Cannot have only one discrete value for an attribute  */

				Error(3, AttName[MaxAtt], "");
			}

			MaxAttVal[MaxAtt] = 0;
		}
		else
		if ( MaxAttVal[MaxAtt] > MaxDiscrVal ) MaxDiscrVal = MaxAttVal[MaxAtt];
    }

    fclose(Nf);
}

/*************************************************************************/
/*									 */
/*	Locate value Val in List[First] to List[Last]			 */
/*									 */
/*************************************************************************/

int Which(String Val, String List[], short First, short Last)
{
    short n=First;

    while ( n <= Last && strcmp(Val, List[n]) ) n++;

    return ( n <= Last ? n : First-1 );
}

/*************************************************************************/
/*									 */
/*	Allocate space then copy string into it				 */
/*									 */
/*************************************************************************/

String CopyString(String x)
{
    char *s;

    s = (char *) calloc(strlen(x)+1, sizeof(char));
    strcpy(s, x);
    return s;
}



/*************************************************************************/
/*									 */
/*			Error messages					 */
/*									 */
/*************************************************************************/
void Error(short n,String s1, String s2)
{
    static char Messages;
	char MsgInfo[200],Buffer[100];

    strcpy(MsgInfo,"ERROR:  ");
    switch(n)
    {
	case 0: sprintf(Buffer,"cannot open file %s%s", s1, s2);
		break;

	case 1:	sprintf(Buffer,"colon expected after attribute name %s", s1);
		break;

	case 2:	sprintf(Buffer,"unexpected eof while reading attribute %s", s1);
		break;

	case 3: sprintf(Buffer,"attribute %s has only one value", s1);
		break;

	case 4: sprintf(Buffer,"case %d's value of '%s' for attribute %s is illegal",
		    MaxItem+1, s2, s1);
		break;

	case 5: sprintf(Buffer,"case %d's class of '%s' is illegal", MaxItem+1, s2);
    }

	strcat(MsgInfo,Buffer);
	AfxMessageBox(MsgInfo);
	if(n==0)
		exit(1);

    if ( ++Messages > 10 )
    {
		AfxMessageBox("Error limit exceeded\n");
		exit(1);
    }
}


/*************************************************************************/
/*	 From Getdata.c								 */
/*	Get case descriptions from data file				 */
/*	--------------------------------------				 */
/*									 */
/*************************************************************************/
/*************************************************************************/
/*									 */
/*  Read raw case descriptions from file with given extension.		 */
/*									 */
/*  On completion, cases are stored in array Item in the form		 */
/*  of Descriptions (i.e. arrays of attribute values), and		 */
/*  MaxItem is set to the number of data items.				 */
/*									 */
/*************************************************************************/
void GetData(String Extension)
{
    FILE *Df;
    char Fn[100];
    ItemNo i=0, ItemSpace=0;
 
    /*  Open data file  */

    strcpy(Fn, FileName);
    strcat(Fn, Extension);
    if ( ! ( Df = fopen(Fn, "r") ) ) 
		Error(0, Fn, "");
    do
    {
		MaxItem = i;
		/*  Make sure there is room for another item  */
		if ( i >= ItemSpace )
		{
			if ( ItemSpace )
			{
				ItemSpace += Inc;
				Item = (Description *)
					realloc(Item, ItemSpace*sizeof(Description));
			}
			else
			{
				Item = (Description *)
					malloc((ItemSpace=Inc)*sizeof(Description));
			}
		}

		Item[i] = GetDescription(Df);

    } while ( Item[i] != Nil && ++i );

    fclose(Df);
    MaxItem = i - 1;
}



/*************************************************************************/
/*									 */
/*  Read a raw case description from file Df.				 */
/*									 */
/*  For each attribute, read the attribute value from the file.		 */
/*  If it is a discrete valued attribute, find the associated no.	 */
/*  of this attribute value (if the value is unknown this is 0).	 */
/*									 */
/*  Returns the Description of the case (i.e. the array of		 */
/*  attribute values).							 */
/*									 */
/*************************************************************************/
Description GetDescription(FILE *Df)
{
    Attribute Att;
    char name[500], *endname;// *CopyString();
    
    int Dv;
    float Cv;
    Description Dvec;
 
    if ( ReadName(Df, name) )
    {
		Dvec = (Description) calloc(MaxAtt+2, sizeof(AttValue));

        ForEach(Att, 0, MaxAtt)
        {
			if ( SpecialStatus[Att] == MYIGNORE )
			{
				/*  Skip this value  */
				DVal(Dvec, Att) = 0;
			}
			else if ( MaxAttVal[Att] || SpecialStatus[Att] == DISCRETE )
			{
				/*  Discrete value  */ 
				if ( ! ( strcmp(name, "?") ) )
				{
					Dv = 0;
				}
				else
				{
					Dv = Which(name, AttValName[Att], 1, MaxAttVal[Att]);
					if ( ! Dv )
					{
						if ( SpecialStatus[Att] == DISCRETE )
						{
							/*  Add value to list  */
							Dv = ++MaxAttVal[Att];
							if ( Dv > (int) AttValName[Att][0] )
							{
								fprintf(fLog,"\nToo many values for %s (max %d)\n",
									AttName[Att], (int) AttValName[Att][0]);
								exit(1);
							}
							AttValName[Att][Dv] = CopyString(name);
						}
						else
						{
							Error(4, AttName[Att], name);
						}
					}
				}
				DVal(Dvec, Att) = Dv;
			}
			else
			{
				/*  Continuous value  */				
				if ( ! ( strcmp(name, "?") ) )
				{
					Cv = Unknown;
				}
				else
				{
					Cv = strtod(name, &endname);
					if ( endname == name || *endname != '\0' )
						Error(4, AttName[Att], name);
				}
				CVal(Dvec, Att) = Cv;
			}
			ReadName(Df, name);
        }

        if ( (Dv = Which(name, ClassName, 0, MaxClass)) < 0 )
        {
			Error(5, "", name);
			Dv = 0;
        }
		Class(Dvec) = Dv;
		return Dvec;
    }
    else
    {
		return Nil;
    }
}


/*************************************************************************/
/*   From BestTree.c
/*									 */
/*	Routines to manage tree growth, pruning and evaluation		 */
/*	------------------------------------------------------		 */
/*									 */
/*************************************************************************/

ItemNo		*TargetClassFreq;
Tree		*Raw;
extern Tree	*Pruned;

/*************************************************************************/
/*									 */
/*	Grow and prune a single tree from all data			 */
/*									 */
/*************************************************************************/

void OneTree()
{

    InitialiseTreeData();
    InitialiseWeights();

    Raw = (Tree *) calloc(1, sizeof(Tree));
    Pruned = (Tree *) calloc(1, sizeof(Tree));

    AllKnown = true;
    Raw[0] = FormTree(0, MaxItem);
    fprintf(fLog,"\n");
    PrintTree(Raw[0]);

    SaveTree(Raw[0], ".unpruned");

    Pruned[0] = CopyTree(Raw[0]);
    if ( Prune(Pruned[0]) )
    {
		fprintf(fLog,"\nSimplified ");
		PrintTree(Pruned[0]);
    }
	return ;
}

/*************************************************************************/
/*									 */
/*	Grow and prune TRIALS trees and select the best of them		 */
/*									 */
/*************************************************************************/

short BestTree()
{
    short t, Best=0;

    InitialiseTreeData();

    TargetClassFreq = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -