📄 mybase.cpp
字号:
/*************************************************************************/
/* */
/* Get names of classes, attributes and attribute values */
/* ----------------------------------------------------- */
/* */
/*************************************************************************/
#include "stdafx.h"
#include "MyBase.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
extern FILE *fLog, * fRST;
/*************************************************************************/
/* */
/* Compute the additional errors if the error rate increases to the */
/* upper limit of the confidence level. The coefficient is the */
/* square of the number of standard deviations corresponding to the */
/* selected confidence level. (Taken from Documenta Geigy Scientific */
/* Tables (Sixth Edition), p185 (with modifications).) */
/* */
/*************************************************************************/
float Val[] = { 0, 0.001, 0.005, 0.01, 0.05, 0.10, 0.20, 0.40, 1.00},
Dev[] = {4.0, 3.09, 2.58, 2.33, 1.65, 1.28, 0.84, 0.25, 0.00};
char Delimiter;
/*************************************************************************/
/* */
/* Read a name from file f into string s, setting Delimiter. */
/* */
/* - Embedded periods are permitted, but periods followed by space */
/* characters act as delimiters. */
/* - Embedded spaces are permitted, but multiple spaces are replaced */
/* by a single space. */
/* - Any character can be escaped by '\'. */
/* - The remainder of a line following '|' is ignored. */
/* */
/*************************************************************************/
BOOL ReadName(FILE *f, String s)
{
register char *Sp=s;
register int c;
/* Skip to first non-space character */
while ( ( c = getc(f) ) == '|' || Space(c) )
{
if ( c == '|' ) SkipComment;
}
/* Return false if no names to read */
if ( c == EOF )
{
Delimiter = EOF;
return false;
}
/* Read in characters up to the next delimiter */
while ( c != ':' && c != ',' && c != '\n' && c != '|' && c != EOF )
{
if ( c == '.' )
{
if ( ( c = getc(f) ) == '|' || Space(c) ) break;
*Sp++ = '.';
}
if ( c == '\\' )
{
c = getc(f);
}
*Sp++ = c;
if ( c == ' ' )
{
while ( ( c = getc(f) ) == ' ' )
;
}
else
{
c = getc(f);
}
}
if ( c == '|' ) SkipComment;
Delimiter = c;
/* Strip trailing spaces */
while ( Space(*(Sp-1)) ) Sp--;
*Sp++ = '\0';
return true;
}
/*************************************************************************/
/* */
/* Read the names of classes, attributes and legal attribute values. */
/* On completion, these names are stored in: */
/* ClassName - class names */
/* AttName - attribute names */
/* AttValName - attribute value names */
/* with: */
/* MaxAttVal - number of values for each attribute */
/* */
/* Other global variables set are: */
/* MaxAtt - maximum attribute number */
/* MaxClass - maximum class number */
/* MaxDiscrVal - maximum discrete values for any attribute */
/* */
/* Note: until the number of attributes is known, the name */
/* information is assembled in local arrays */
/* */
/*************************************************************************/
void GetNames()
{
FILE *Nf;//, *fopen();
char Fn[100], Buffer[1000];
DiscrValue v;
int AttCeiling=100, ClassCeiling=100, ValCeiling;
/* Open names file */
strcpy(Fn, FileName);
strcat(Fn, ".names");
if ( ! ( Nf = fopen(Fn, "r") ) ) Error(0, Fn, "");
/* Get class names from names file */
ClassName = (String *) calloc(ClassCeiling, sizeof(String));
MaxClass = -1;
do
{
ReadName(Nf, Buffer);
if ( ++MaxClass >= ClassCeiling)
{
ClassCeiling += 100;
ClassName = (String *) realloc(ClassName, ClassCeiling*sizeof(String));
}
ClassName[MaxClass] = CopyString(Buffer);
}while ( Delimiter == ',' );
/* Get attribute and attribute value names from names file */
AttName = (String *) calloc(AttCeiling, sizeof(String));
MaxAttVal = (DiscrValue *) calloc(AttCeiling, sizeof(DiscrValue));
AttValName = (String **) calloc(AttCeiling, sizeof(String *));
SpecialStatus = (char *) malloc(AttCeiling);
MaxAtt = -1;
while ( ReadName(Nf, Buffer) )
{
if ( Delimiter != ':' ) Error(1, Buffer, "");
if ( ++MaxAtt >= AttCeiling )
{
AttCeiling += 100;
AttName = (String *) realloc(AttName, AttCeiling*sizeof(String));
MaxAttVal = (DiscrValue *) realloc(MaxAttVal, AttCeiling*sizeof(DiscrValue));
AttValName = (String **) realloc(AttValName, AttCeiling*sizeof(String *));
SpecialStatus = (char *) realloc(SpecialStatus, AttCeiling);
}
AttName[MaxAtt] = CopyString(Buffer);
SpecialStatus[MaxAtt] = Nil;
MaxAttVal[MaxAtt] = 0;
ValCeiling = 100;
AttValName[MaxAtt] = (String *) calloc(ValCeiling, sizeof(String));
do
{
if ( ! ( ReadName(Nf, Buffer) ) ) Error(2, AttName[MaxAtt], "");
if ( ++MaxAttVal[MaxAtt] >= ValCeiling )
{
ValCeiling += 100;
AttValName[MaxAtt] =
(String *) realloc(AttValName[MaxAtt], ValCeiling*sizeof(String));
}
AttValName[MaxAtt][MaxAttVal[MaxAtt]] = CopyString(Buffer);
}
while ( Delimiter == ',' );
if ( MaxAttVal[MaxAtt] == 1 )
{
/* Check for special treatment */
if ( ! strcmp(Buffer, "continuous") )
{}
else
if ( ! memcmp(Buffer, "discrete", 8) )
{
SpecialStatus[MaxAtt] = DISCRETE;
/* Read max values, reserve space and check MaxDiscrVal */
v = atoi(&Buffer[8]);
if ( v < 2 )
{
fprintf(fLog,"** %s: illegal number of discrete values\n",
AttName[MaxAtt]);
exit(1);
}
AttValName[MaxAtt] =
(String *) realloc(AttValName[MaxAtt], (v+2)*sizeof(String));
AttValName[MaxAtt][0] = (char *) v;
if ( v > MaxDiscrVal ) MaxDiscrVal = v;
}
else if ( ! strcmp(Buffer, "ignore") )
{
SpecialStatus[MaxAtt] = MYIGNORE;
}
else
{
/* Cannot have only one discrete value for an attribute */
Error(3, AttName[MaxAtt], "");
}
MaxAttVal[MaxAtt] = 0;
}
else
if ( MaxAttVal[MaxAtt] > MaxDiscrVal ) MaxDiscrVal = MaxAttVal[MaxAtt];
}
fclose(Nf);
}
/*************************************************************************/
/* */
/* Locate value Val in List[First] to List[Last] */
/* */
/*************************************************************************/
int Which(String Val, String List[], short First, short Last)
{
short n=First;
while ( n <= Last && strcmp(Val, List[n]) ) n++;
return ( n <= Last ? n : First-1 );
}
/*************************************************************************/
/* */
/* Allocate space then copy string into it */
/* */
/*************************************************************************/
String CopyString(String x)
{
char *s;
s = (char *) calloc(strlen(x)+1, sizeof(char));
strcpy(s, x);
return s;
}
/*************************************************************************/
/* */
/* Error messages */
/* */
/*************************************************************************/
void Error(short n,String s1, String s2)
{
static char Messages;
char MsgInfo[200],Buffer[100];
strcpy(MsgInfo,"ERROR: ");
switch(n)
{
case 0: sprintf(Buffer,"cannot open file %s%s", s1, s2);
break;
case 1: sprintf(Buffer,"colon expected after attribute name %s", s1);
break;
case 2: sprintf(Buffer,"unexpected eof while reading attribute %s", s1);
break;
case 3: sprintf(Buffer,"attribute %s has only one value", s1);
break;
case 4: sprintf(Buffer,"case %d's value of '%s' for attribute %s is illegal",
MaxItem+1, s2, s1);
break;
case 5: sprintf(Buffer,"case %d's class of '%s' is illegal", MaxItem+1, s2);
}
strcat(MsgInfo,Buffer);
AfxMessageBox(MsgInfo);
if(n==0)
exit(1);
if ( ++Messages > 10 )
{
AfxMessageBox("Error limit exceeded\n");
exit(1);
}
}
/*************************************************************************/
/* From Getdata.c */
/* Get case descriptions from data file */
/* -------------------------------------- */
/* */
/*************************************************************************/
/*************************************************************************/
/* */
/* Read raw case descriptions from file with given extension. */
/* */
/* On completion, cases are stored in array Item in the form */
/* of Descriptions (i.e. arrays of attribute values), and */
/* MaxItem is set to the number of data items. */
/* */
/*************************************************************************/
void GetData(String Extension)
{
FILE *Df;
char Fn[100];
ItemNo i=0, ItemSpace=0;
/* Open data file */
strcpy(Fn, FileName);
strcat(Fn, Extension);
if ( ! ( Df = fopen(Fn, "r") ) )
Error(0, Fn, "");
do
{
MaxItem = i;
/* Make sure there is room for another item */
if ( i >= ItemSpace )
{
if ( ItemSpace )
{
ItemSpace += Inc;
Item = (Description *)
realloc(Item, ItemSpace*sizeof(Description));
}
else
{
Item = (Description *)
malloc((ItemSpace=Inc)*sizeof(Description));
}
}
Item[i] = GetDescription(Df);
} while ( Item[i] != Nil && ++i );
fclose(Df);
MaxItem = i - 1;
}
/*************************************************************************/
/* */
/* Read a raw case description from file Df. */
/* */
/* For each attribute, read the attribute value from the file. */
/* If it is a discrete valued attribute, find the associated no. */
/* of this attribute value (if the value is unknown this is 0). */
/* */
/* Returns the Description of the case (i.e. the array of */
/* attribute values). */
/* */
/*************************************************************************/
Description GetDescription(FILE *Df)
{
Attribute Att;
char name[500], *endname;// *CopyString();
int Dv;
float Cv;
Description Dvec;
if ( ReadName(Df, name) )
{
Dvec = (Description) calloc(MaxAtt+2, sizeof(AttValue));
ForEach(Att, 0, MaxAtt)
{
if ( SpecialStatus[Att] == MYIGNORE )
{
/* Skip this value */
DVal(Dvec, Att) = 0;
}
else if ( MaxAttVal[Att] || SpecialStatus[Att] == DISCRETE )
{
/* Discrete value */
if ( ! ( strcmp(name, "?") ) )
{
Dv = 0;
}
else
{
Dv = Which(name, AttValName[Att], 1, MaxAttVal[Att]);
if ( ! Dv )
{
if ( SpecialStatus[Att] == DISCRETE )
{
/* Add value to list */
Dv = ++MaxAttVal[Att];
if ( Dv > (int) AttValName[Att][0] )
{
fprintf(fLog,"\nToo many values for %s (max %d)\n",
AttName[Att], (int) AttValName[Att][0]);
exit(1);
}
AttValName[Att][Dv] = CopyString(name);
}
else
{
Error(4, AttName[Att], name);
}
}
}
DVal(Dvec, Att) = Dv;
}
else
{
/* Continuous value */
if ( ! ( strcmp(name, "?") ) )
{
Cv = Unknown;
}
else
{
Cv = strtod(name, &endname);
if ( endname == name || *endname != '\0' )
Error(4, AttName[Att], name);
}
CVal(Dvec, Att) = Cv;
}
ReadName(Df, name);
}
if ( (Dv = Which(name, ClassName, 0, MaxClass)) < 0 )
{
Error(5, "", name);
Dv = 0;
}
Class(Dvec) = Dv;
return Dvec;
}
else
{
return Nil;
}
}
/*************************************************************************/
/* From BestTree.c
/* */
/* Routines to manage tree growth, pruning and evaluation */
/* ------------------------------------------------------ */
/* */
/*************************************************************************/
ItemNo *TargetClassFreq;
Tree *Raw;
extern Tree *Pruned;
/*************************************************************************/
/* */
/* Grow and prune a single tree from all data */
/* */
/*************************************************************************/
void OneTree()
{
InitialiseTreeData();
InitialiseWeights();
Raw = (Tree *) calloc(1, sizeof(Tree));
Pruned = (Tree *) calloc(1, sizeof(Tree));
AllKnown = true;
Raw[0] = FormTree(0, MaxItem);
fprintf(fLog,"\n");
PrintTree(Raw[0]);
SaveTree(Raw[0], ".unpruned");
Pruned[0] = CopyTree(Raw[0]);
if ( Prune(Pruned[0]) )
{
fprintf(fLog,"\nSimplified ");
PrintTree(Pruned[0]);
}
return ;
}
/*************************************************************************/
/* */
/* Grow and prune TRIALS trees and select the best of them */
/* */
/*************************************************************************/
short BestTree()
{
short t, Best=0;
InitialiseTreeData();
TargetClassFreq = (ItemNo *) calloc(MaxClass+1, sizeof(ItemNo));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -