📄 hooks.c
字号:
/*************************************************************************//* *//* Source code for use with See5/C5.0 Release 2.02 *//* ----------------------------------------------- *//* Copyright RuleQuest Research 2005 *//* *//* This code is provided "as is" without warranty of any kind, *//* either express or implied. All use is at your own risk. *//* *//*************************************************************************/#define MAXLINEBUFFER 10000char LineBuffer[MAXLINEBUFFER], *LBp=LineBuffer;/*************************************************************************//* *//* Read a name from file f into string s, setting Delimiter. *//* *//* - Embedded periods are permitted, but periods followed by space *//* characters act as delimiters. *//* - Embedded spaces are permitted, but multiple spaces are *//* replaced by a single space. *//* - Any character can be escaped by '\'. *//* - The remainder of a line following '|' is ignored. *//* *//*************************************************************************/Boolean ReadName(FILE *f, String s, int n, char ColonOpt)/* -------- */{ register char *Sp=s; register int c; char Msg[2]; /* Skip to first non-space character */ while ( (c = InChar(f)) == '|' || Space(c) ) { if ( c == '|' ) SkipComment; } /* Return false if no names to read */ if ( c == EOF ) { Delimiter = EOF; return false; } /* Read in characters up to the next delimiter */ while ( c != ColonOpt && c != ',' && c != '\n' && c != '|' && c != EOF ) { if ( --n <= 0 ) { if ( Of ) Error(LONGNAME, "", ""); } if ( c == '.' ) { if ( (c = InChar(f)) == '|' || Space(c) || c == EOF ) break; *Sp++ = '.'; continue; } if ( c == '\\' ) { c = InChar(f); } if ( Space(c) ) { *Sp++ = ' '; while ( ( c = InChar(f) ) == ' ' || c == '\t' ) ; } else { *Sp++ = c; c = InChar(f); } } if ( c == '|' ) SkipComment; Delimiter = c; /* Special case for ':=' */ if ( Delimiter == ':' ) { if ( *LBp == '=' ) { Delimiter = '='; LBp++; } } /* Strip trailing spaces */ while ( Sp > s && Space(*(Sp-1)) ) Sp--; if ( Sp == s ) { Msg[0] = ( Space(c) ? '.' : c ); Msg[1] = '\00'; Error(MISSNAME, Fn, Msg); } *Sp++ = '\0'; return true;}/*************************************************************************//* *//* Read names of classes, attributes and legal attribute values. *//* On completion, names are stored in: *//* ClassName - class names *//* AttName - attribute names *//* AttValName - attribute value names *//* with: *//* MaxAttVal - number of values for each attribute *//* *//* Other global variables set are: *//* MaxAtt - maximum attribute number *//* MaxClass - maximum class number *//* *//* Note: until the number of attributes is known, the name *//* information is assembled in local arrays *//* *//*************************************************************************/void GetNames(FILE *Nf)/* -------- */{ char Buffer[1000]="", *EndBuff; int AttCeiling=100, ClassCeiling=100; Attribute Att; ClassNo c; ErrMsgs = AttExIn = 0; LineNo = 0; LBp = LineBuffer; *LBp = 0; /* Get class names from names file. This entry can be: - a list of discrete values separated by commas - the name of the discrete attribute to use as the class - the name of a continuous attribute followed by a colon and a comma-separated list of thresholds used to segment it */ ClassName = AllocZero(ClassCeiling, String); MaxClass = ClassAtt = LabelAtt = 0; do { ReadName(Nf, Buffer, 1000, ':'); if ( ++MaxClass >= ClassCeiling) { ClassCeiling += 100; Realloc(ClassName, ClassCeiling, String); } ClassName[MaxClass] = strdup(Buffer); } while ( Delimiter == ',' ); if ( Delimiter == ':' ) { /* Thresholds for continuous class attribute */ ClassThresh = Alloc(ClassCeiling, ContValue); MaxClass = 0; do { ReadName(Nf, Buffer, 1000, ':'); if ( ++MaxClass >= ClassCeiling) { ClassCeiling += 100; Realloc(ClassThresh, ClassCeiling, ContValue); } ClassThresh[MaxClass] = strtod(Buffer, &EndBuff); if ( EndBuff == Buffer || *EndBuff != '\0' ) { Error(BADCLASSTHRESH, Buffer, Nil); } else if ( MaxClass > 1 && ClassThresh[MaxClass] <= ClassThresh[MaxClass-1] ) { Error(LEQCLASSTHRESH, Buffer, Nil); } } while ( Delimiter == ',' ); } /* Get attribute and attribute value names from names file */ AttName = AllocZero(AttCeiling, String); MaxAttVal = AllocZero(AttCeiling, DiscrValue); AttValName = AllocZero(AttCeiling, String *); SpecialStatus = AllocZero(AttCeiling, char); AttDef = AllocZero(AttCeiling, Definition); MaxAtt = 0; while ( ReadName(Nf, Buffer, 1000, ':') ) { if ( Delimiter != ':' && Delimiter != '=' ) { Error(BADATTNAME, Buffer, ""); } /* Check for attributes included/excluded */ if ( ( *Buffer == 'a' || *Buffer == 'A' ) && ! memcmp(Buffer+1, "ttributes ", 10) && ! memcmp(Buffer+strlen(Buffer)-6, "cluded", 6) ) { AttExIn = ( ! memcmp(Buffer+strlen(Buffer)-8, "in", 2) ? 1 : -1 ); if ( AttExIn == 1 ) { ForEach(Att, 1, MaxAtt) { SpecialStatus[Att] |= SKIP; } } while ( ReadName(Nf, Buffer, 1000, ':') ) { Att = Which(Buffer, AttName, 1, MaxAtt); if ( ! Att ) { Error(UNKNOWNATT, Buffer, Nil); } else if ( AttExIn == 1 ) { SpecialStatus[Att] -= SKIP; } else { SpecialStatus[Att] |= SKIP; } } break; } if ( Which(Buffer, AttName, 1, MaxAtt) > 0 ) { Error(DUPATTNAME, Buffer, Nil); } if ( ++MaxAtt >= AttCeiling ) { AttCeiling += 100; Realloc(AttName, AttCeiling, String); Realloc(MaxAttVal, AttCeiling, DiscrValue); Realloc(AttValName, AttCeiling, String *); Realloc(SpecialStatus, AttCeiling, char); Realloc(AttDef, AttCeiling, Definition); } AttName[MaxAtt] = strdup(Buffer); SpecialStatus[MaxAtt] = Nil; AttDef[MaxAtt] = Nil; MaxAttVal[MaxAtt] = 0; if ( Delimiter == '=' ) { if ( MaxClass == 1 && ! strcmp(ClassName[1], AttName[MaxAtt]) ) { Error(BADDEF3, Nil, Nil); } ImplicitAtt(Nf); } else { ExplicitAtt(Nf); } } /* Check whether class is one of the attributes */ if ( MaxClass == 1 || ClassThresh ) { /* Class attribute must be present and must be either a discrete attribute or a thresholded continuous attribute */ ClassAtt = Which(ClassName[1], AttName, 1, MaxAtt); if ( ClassAtt <= 0 || Exclude(ClassAtt) ) { Error(NOTARGET, ClassName[1], ""); } else if ( ClassThresh && ( ! Continuous(ClassAtt) || StatBit(ClassAtt, DATEVAL|STIMEVAL|TSTMPVAL) ) ) { Error(BADCTARGET, ClassName[1], ""); } else if ( ! ClassThresh && ( Continuous(ClassAtt) || StatBit(ClassAtt, DISCRETE) ) ) { Error(BADDTARGET, ClassName[1], ""); } Free(ClassName[1]); if ( ! ClassThresh ) { Free(ClassName); MaxClass = MaxAttVal[ClassAtt]; ClassName = AttValName[ClassAtt]; } else { /* Set up class names as segments of continuous target att */ MaxClass++; Realloc(ClassName, MaxClass+1, String); sprintf(Buffer, "%s <= %g", AttName[ClassAtt], ClassThresh[1]); ClassName[1] = strdup(Buffer); ForEach(c, 2, MaxClass-1) { sprintf(Buffer, "%g < %s <= %g", ClassThresh[c-1], AttName[ClassAtt], ClassThresh[c]); ClassName[c] = strdup(Buffer); } sprintf(Buffer, "%s > %g", AttName[ClassAtt], ClassThresh[MaxClass-1]); ClassName[MaxClass] = strdup(Buffer); } } ClassName[0] = "?"; fclose(Nf); if ( ErrMsgs > 0 ) Goodbye(1);}/*************************************************************************//* *//* Continuous or discrete attribute *//* *//*************************************************************************/void ExplicitAtt(FILE *Nf)/* ----------- */{ char Buffer[1000]="", *p; DiscrValue v; int ValCeiling=100, BaseYear; time_t clock; /* Read attribute type or first discrete value */ if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) ) { Error(EOFINATT, AttName[MaxAtt], ""); } MaxAttVal[MaxAtt] = 0; if ( Delimiter != ',' ) { /* Typed attribute */ if ( ! strcmp(Buffer, "continuous") ) { } else if ( ! strcmp(Buffer, "timestamp") ) { SpecialStatus[MaxAtt] = TSTMPVAL; /* Set the base date if not done already */ if ( ! TSBase ) { clock = time(0); BaseYear = gmtime(&clock)->tm_year + 1900; SetTSBase(BaseYear); } } else if ( ! strcmp(Buffer, "date") ) { SpecialStatus[MaxAtt] = DATEVAL; } else if ( ! strcmp(Buffer, "time") ) { SpecialStatus[MaxAtt] = STIMEVAL; } else if ( ! memcmp(Buffer, "discrete", 8) ) { SpecialStatus[MaxAtt] = DISCRETE; /* Read max values and reserve space */ v = atoi(&Buffer[8]); if ( v < 2 ) { Error(BADDISCRETE, AttName[MaxAtt], ""); } AttValName[MaxAtt] = Alloc(v+3, String); AttValName[MaxAtt][0] = (char *) (long) v+1; AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A"); } else if ( ! strcmp(Buffer, "ignore") ) { SpecialStatus[MaxAtt] = EXCLUDE; } else if ( ! strcmp(Buffer, "label") ) { LabelAtt = MaxAtt; SpecialStatus[MaxAtt] = EXCLUDE; } else { /* Cannot have only one discrete value for an attribute */ Error(SINGLEATTVAL, AttName[MaxAtt], Buffer); } } else { /* Discrete attribute with explicit values */ AttValName[MaxAtt] = AllocZero(ValCeiling, String); /* Add "N/A" unless this attribute is the class */ if ( MaxClass > 1 || strcmp(ClassName[1], AttName[MaxAtt]) ) { AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A"); } else { MaxAttVal[MaxAtt] = 0; } p = Buffer; /* Special check for ordered attribute */ if ( ! memcmp(Buffer, "[ordered]", 9) ) { SpecialStatus[MaxAtt] = ORDERED; for ( p = Buffer+9 ; Space(*p) ; p++ ) ; } /* Record first real explicit value */ AttValName[MaxAtt][++MaxAttVal[MaxAtt]] = strdup(p); /* Record remaining values */ do { if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) ) { Error(EOFINATT, AttName[MaxAtt], ""); } if ( ++MaxAttVal[MaxAtt] >= ValCeiling ) { ValCeiling += 100; Realloc(AttValName[MaxAtt], ValCeiling, String); } AttValName[MaxAtt][MaxAttVal[MaxAtt]] = strdup(Buffer); } while ( Delimiter == ',' ); /* Cancel ordered status if <3 real values */ if ( Ordered(MaxAtt) && MaxAttVal[MaxAtt] <= 3 ) { SpecialStatus[MaxAtt] = 0; } }}/*************************************************************************//* *//* Locate value Val in List[First] to List[Last] *//* *//*************************************************************************/int Which(String Val, String *List, int First, int Last)/* ----- */{ int n=First; while ( n <= Last && strcmp(Val, List[n]) ) n++; return ( n <= Last ? n : First-1 );}/*************************************************************************//* *//* Read next char keeping track of line numbers *//* *//*************************************************************************/int InChar(FILE *f)/* ------ */{ if ( ! *LBp ) { LBp = LineBuffer; if ( ! fgets(LineBuffer, MAXLINEBUFFER, f) ) { LineBuffer[0] = '\00'; return EOF; } LineNo++; } return (int) *LBp++;}/*************************************************************************//* *//* Read a raw case description from file Df. *//* *//* For each attribute, read the attribute value from the file. *//* If it is a discrete valued attribute, find the associated no. *//* of this attribute value (if the value is unknown this is 0). *//* *//* Returns the Description of the case (i.e. the array of *//* attribute values). *//* *//*************************************************************************/#define XError(a,b,c) Error(a,b,c)Description GetDescription(FILE *Df, Boolean Train)/* -------------- */{ Attribute Att; char Name[1000], *EndName; int Dv; Description Dummy, DVec; ContValue Cv; Boolean FirstValue=true;#if defined WIN32 && ! defined _CONSOLE extern int XREF;#endif if ( ReadName(Df, Name, 1000, '\00') ) { Dummy = AllocZero(MaxAtt+2, AttValue); DVec = &Dummy[1]; ForEach(Att, 1, MaxAtt) { if ( AttDef[Att] ) { DVec[Att] = EvaluateDef(AttDef[Att], DVec); if ( Continuous(Att) ) { CheckValue(DVec, Att); } if ( SomeMiss ) { SomeMiss[Att] |= Unknown(DVec, Att); SomeNA[Att] |= NotApplic(DVec, Att); } continue; } /* Get the attribute value if don't already have it */ if ( ! FirstValue && ! ReadName(Df, Name, 1000, '\00') ) { XError(HITEOF, AttName[Att], ""); FreeLastCase(DVec); return Nil; } FirstValue = false; if ( Exclude(Att) ) {#if defined WIN32 && ! defined _CONSOLE if ( XREF || Att == LabelAtt )#else if ( Att == LabelAtt )#endif { /* Record the value as a string */ SVal(DVec,Att) = StoreIVal(Name); } } else if ( ! strcmp(Name, "?") ) { /* Set marker to indicate missing value */ DVal(DVec, Att) = UNKNOWN; if ( SomeMiss ) SomeMiss[Att] = true; } else if ( Att != ClassAtt && ! strcmp(Name, "N/A") ) { /* Set marker to indicate not applicable */ DVal(DVec, Att) = NA; if ( SomeNA ) SomeNA[Att] = true; } else if ( Discrete(Att) ) { /* Discrete attribute */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -