📄 hooks.c
字号:
/*************************************************************************//* *//* Source code for use with Cubist Release 2.04 *//* -------------------------------------------- *//* Copyright RuleQuest Research 2007 *//* *//* This code is provided "as is" without warranty of any kind, *//* either express or implied. All use is at your own risk. *//* *//*************************************************************************//*=======================================================================*//* *//* Get names of classes, attributes and attribute values *//* *//*=======================================================================*/#define MAXLINEBUFFER 10000char LineBuffer[MAXLINEBUFFER], *LBp=LineBuffer;/*************************************************************************//* *//* Read a name from file f into string s, setting Delimiter. *//* *//* - Embedded periods are permitted, but periods followed by space *//* characters act as delimiters. *//* - Embedded spaces are permitted, but multiple spaces are *//* replaced by a single space. *//* - Any character can be escaped by '\'. *//* - The remainder of a line following '|' is ignored. *//* *//*************************************************************************/Boolean ReadName(FILE *f, String s, int n, char ColonOpt)/* -------- */{ register char *Sp=s; register int c; char Msg[2]; /* Skip to first non-space character */ while ( (c = InChar(f)) == '|' || Space(c) ) { if ( c == '|' ) SkipComment; } /* Return false if no names to read */ if ( c == EOF ) { Delimiter = EOF; return false; } /* Read in characters up to the next delimiter */ while ( c != ColonOpt && c != ',' && c != '\n' && c != '|' && c != EOF ) { if ( --n <= 0 ) { if ( Of ) Error(LONGNAME, "", ""); } if ( c == '.' ) { if ( (c = InChar(f)) == '|' || Space(c) || c == EOF ) break; *Sp++ = '.'; continue; } if ( c == '\\' ) { c = InChar(f); } if ( Space(c) ) { *Sp++ = ' '; while ( ( c = InChar(f) ) == ' ' || c == '\t' ) ; } else { *Sp++ = c; c = InChar(f); } } if ( c == '|' ) SkipComment; Delimiter = c; /* Special case for ':=' */ if ( Delimiter == ':' ) { if ( *LBp == '=' ) { Delimiter = '='; LBp++; } } /* Strip trailing spaces */ while ( Sp > s && Space(*(Sp-1)) ) Sp--; if ( Sp == s ) { Msg[0] = ( Space(c) ? '.' : c ); Msg[1] = '\00'; Error(MISSNAME, Fn, Msg); } *Sp++ = '\0'; return true;}/*************************************************************************//* *//* Read names of classes, attributes and legal attribute values. *//* On completion, names are stored in: *//* AttName - attribute names *//* AttValName - attribute value names *//* with: *//* MaxAttVal - number of values for each attribute *//* *//* Other global variables set are: *//* MaxAtt - maximum attribute number *//* MaxDiscrVal - maximum discrete values for an attribute *//* *//* Note: until the number of attributes is known, the name *//* information is assembled in local arrays *//* *//*************************************************************************/void GetNames(FILE *Nf)/* -------- */{ char Buffer[1000]="", *Target; int AttCeiling=100; Attribute Att; ErrMsgs = AttExIn = LineNo = 0; LBp = LineBuffer; *LBp = 0; /* Get name of dependent att */ ReadName(Nf, Buffer, 1000, ':'); Target = strdup(Buffer); /* Get attribute and attribute value names from names file */ AttName = AllocZero(AttCeiling, String); MaxAttVal = AllocZero(AttCeiling, DiscrValue); AttValName = AllocZero(AttCeiling, String *); SpecialStatus = AllocZero(AttCeiling, char); AttDef = AllocZero(AttCeiling, Definition); MaxAtt = LabelAtt = CWtAtt = 0; while ( ReadName(Nf, Buffer, 1000, ':') ) { if ( Delimiter != ':' && Delimiter != '=' ) { Error(BADATTNAME, Buffer, ""); } /* Check for include/exclude instruction */ if ( ( *Buffer == 'a' || *Buffer == 'A' ) && ! memcmp(Buffer+1, "ttributes ", 10) && ! memcmp(Buffer+strlen(Buffer)-6, "cluded", 6) ) { AttExIn = ( ! memcmp(Buffer+strlen(Buffer)-8, "in", 2) ? 1 : -1 ); if ( AttExIn == 1 ) { ForEach(Att, 1, MaxAtt) { SpecialStatus[Att] |= SKIP; } } while ( ReadName(Nf, Buffer, 1000, ':') ) { Att = Which(Buffer, AttName, 1, MaxAtt); if ( ! Att ) { Error(UNKNOWNATT, Buffer, Nil); } else if ( AttExIn == 1 ) { SpecialStatus[Att] -= SKIP; } else { SpecialStatus[Att] |= SKIP; } } break; } if ( Which(Buffer, AttName, 1, MaxAtt) > 0 ) { Error(DUPATTNAME, Buffer, Nil); } if ( ++MaxAtt >= AttCeiling ) { AttCeiling += 100; Realloc(AttName, AttCeiling, String); Realloc(MaxAttVal, AttCeiling, DiscrValue); Realloc(AttValName, AttCeiling, String *); Realloc(SpecialStatus, AttCeiling, char); Realloc(AttDef, AttCeiling, Definition); } AttName[MaxAtt] = strdup(Buffer); SpecialStatus[MaxAtt] = 0; AttDef[MaxAtt] = Nil; MaxAttVal[MaxAtt] = 0; if ( Delimiter == '=' ) { ImplicitAtt(Nf); } else { ExplicitAtt(Nf); } /* Check for case weight attribute, which must be type continuous */ if ( ! strcmp(AttName[MaxAtt], "case weight") ) { CWtAtt = MaxAtt; if ( ! Continuous(CWtAtt) ) { Error(CWTATTERR, "", ""); } } } ClassAtt = Which(Target, AttName, 1, MaxAtt); /* Make sure not excluding class attribute */ if ( Skip(ClassAtt) ) SpecialStatus[ClassAtt] -= SKIP; /* Class attribute must be defined and must be continuous */ if ( ClassAtt <= 0 ) { Error(NOTARGET, Target, ""); } else if ( MaxAttVal[ClassAtt] > 0 || StatBit(ClassAtt, DISCRETE|DATEVAL|STIMEVAL|EXCLUDE) ) { Error(BADTARGET, Target, ""); } /* Ignore case weight attribute if it is excluded; otherwise, it cannot be used in models */ if ( CWtAtt ) { if ( Skip(CWtAtt) ) { CWtAtt = 0; } else { SpecialStatus[CWtAtt] |= SKIP; } } fclose(Nf); Free(Target); if ( ErrMsgs > 0 ) Goodbye(1);}/*************************************************************************//* *//* Continuous or discrete attribute *//* *//*************************************************************************/void ExplicitAtt(FILE *Nf)/* ----------- */{ char Buffer[1000]="", *p; DiscrValue v; int ValCeiling=100, BaseYear; time_t clock; /* Read attribute type or first discrete value */ if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) ) { Error(EOFINATT, AttName[MaxAtt], ""); } MaxAttVal[MaxAtt] = 0; if ( Delimiter != ',' ) { /* Typed attribute */ if ( ! strcmp(Buffer, "continuous") ) { } else if ( ! strcmp(Buffer, "timestamp") ) { SpecialStatus[MaxAtt] = TSTMPVAL; /* Set the base date if not done already */ if ( ! TSBase ) { clock = time(0); BaseYear = gmtime(&clock)->tm_year + 1900; SetTSBase(BaseYear); } } else if ( ! strcmp(Buffer, "date") ) { SpecialStatus[MaxAtt] = DATEVAL; } else if ( ! strcmp(Buffer, "time") ) { SpecialStatus[MaxAtt] = STIMEVAL; } else if ( ! memcmp(Buffer, "discrete", 8) ) { SpecialStatus[MaxAtt] = DISCRETE; /* Read max values and reserve space */ v = atoi(&Buffer[8]); if ( v < 2 ) { Error(BADDISCRETE, AttName[MaxAtt], ""); } AttValName[MaxAtt] = Alloc(v+3, String); AttValName[MaxAtt][0] = (char *) (long) v+1; AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A"); } else if ( ! strcmp(Buffer, "ignore") ) { SpecialStatus[MaxAtt] = EXCLUDE; } else if ( ! strcmp(Buffer, "label") ) { LabelAtt = MaxAtt; SpecialStatus[MaxAtt] = EXCLUDE; } else { /* Cannot have only one discrete value for an attribute */ Error(SINGLEATTVAL, AttName[MaxAtt], Buffer); } } else { /* Discrete attribute with explicit values */ AttValName[MaxAtt] = AllocZero(ValCeiling, String); /* Add "N/A" */ AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A"); p = Buffer; /* Special check for ordered attribute */ if ( ! memcmp(Buffer, "[ordered]", 9) ) { SpecialStatus[MaxAtt] = ORDERED; for ( p = Buffer+9 ; Space(*p) ; p++ ) ; } /* Record first real explicit value */ AttValName[MaxAtt][++MaxAttVal[MaxAtt]] = strdup(p); /* Record remaining values */ do { if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) ) { Error(EOFINATT, AttName[MaxAtt], ""); } if ( ++MaxAttVal[MaxAtt] >= ValCeiling ) { ValCeiling += 100; Realloc(AttValName[MaxAtt], ValCeiling, String); } AttValName[MaxAtt][MaxAttVal[MaxAtt]] = strdup(Buffer); } while ( Delimiter == ',' ); /* Cancel ordered status if <3 real values */ if ( Ordered(MaxAtt) && MaxAttVal[MaxAtt] <= 3 ) { SpecialStatus[MaxAtt] = 0; } if ( MaxAttVal[MaxAtt] > MaxDiscrVal ) MaxDiscrVal = MaxAttVal[MaxAtt]; }}/*=======================================================================*//* *//* Routines to handle implicitly-defined attributes *//* *//*=======================================================================*/char *Buff; /* buffer for input characters */int BuffSize, BN; /* size and index of next character */EltRec *TStack; /* expression stack model */int TStackSize, TSN; /* size of stack and index of next entry */int DefSize, DN; /* size of definition and next element */Boolean PreviousError; /* to avoid parasytic errors */AttValue _UNK, /* quasi-constant for unknown value */ _NA; /* ditto for not applicable */#define FailSyn(Msg) {DefSyntaxError(Msg); return false;}#define FailSem(Msg) {DefSemanticsError(Fi, Msg, OpCode); return false;}typedef union _xstack_elt { DiscrValue _discr_val; ContValue _cont_val; String _string_val; } XStackElt;#define cval _cont_val#define sval _string_val#define dval _discr_val#define XDVal(c,a) DVal(c,a)/*************************************************************************//* *//* A definition is handled in two stages: *//* - The definition is read (up to a line ending with a period) *//* replacing multiple whitespace characters with one space *//* - The definition is then read (using a recursive descent *//* parser), building up a reverse polish expression *//* Syntax and semantics errors are flagged *//* *//*************************************************************************/void ImplicitAtt(FILE *Nf)/* ----------- */{#ifdef CUBIST _UNK.cval = UNKNOWN;#else _UNK.dval = UNKNOWN;#endif _NA.dval = NA; /* Get definition as a string in Buff */ ReadDefinition(Nf); PreviousError = false; BN = 0; /* Allocate initial stack and attribute definition */ TStack = Alloc(TStackSize=50, EltRec); TSN = 0; AttDef[MaxAtt] = Alloc(DefSize = 100, DefElt); DN = 0; /* Parse Buff as an expression terminated by a period */ Expression(); if ( ! Find(".") ) DefSyntaxError("'.' ending definition"); /* Final check -- defined attribute must not be of type String */ if ( ! PreviousError ) { if ( DN == 1 && DefOp(AttDef[MaxAtt][0]) == OP_ATT && strcmp(AttName[MaxAtt], "case weight") ) { Error(SAMEATT, AttName[ (long) DefSVal(AttDef[MaxAtt][0]) ], Nil); } if ( TStack[0].Type == 'B' ) { /* Defined attributes should never have a value N/A */ MaxAttVal[MaxAtt] = 3; AttValName[MaxAtt] = AllocZero(4, String); AttValName[MaxAtt][1] = strdup("??"); AttValName[MaxAtt][2] = strdup("t"); AttValName[MaxAtt][3] = strdup("f"); } else { MaxAttVal[MaxAtt] = 0; } } if ( PreviousError ) { DN = 0; SpecialStatus[MaxAtt] = EXCLUDE; } /* Write a terminating marker */ DefOp(AttDef[MaxAtt][DN]) = OP_END; Free(Buff); Free(TStack);}/*************************************************************************//* *//* Read the text of a definition. Skip comments, collapse *//* multiple whitespace characters. *//* *//*************************************************************************/void ReadDefinition(FILE *f)/* -------------- */{ Boolean LastWasPeriod=false; char c; Buff = Alloc(BuffSize=50, char); BN = 0; while ( true ) { c = InChar(f); if ( c == '|' ) SkipComment; if ( c == EOF || c == '\n' && LastWasPeriod ) { /* The definition is complete. Add a period if it's not there already and terminate the string */ if ( ! LastWasPeriod ) Append('.'); Append(0); return; } if ( Space(c) ) { Append(' '); } else if ( c == '\\' ) { /* Escaped character -- bypass any special meaning */ Append(InChar(f)); } else { LastWasPeriod = ( c == '.' ); Append(c); } }}/*************************************************************************//* *//* Append a character to Buff, resizing it if necessary *//* *//*************************************************************************/void Append(char c)/* ------ */{ if ( c == ' ' && (! BN || Buff[BN-1] == ' ' ) ) return; if ( BN >= BuffSize ) { Realloc(Buff, BuffSize += 50, char); } Buff[BN++] = c;}/*************************************************************************//* *//* Recursive descent parser with syntax error checking. *//* The reverse polish is built up by calls to Dump() and DumpOp(), *//* which also check for semantic validity. *//* *//* For possible error messages, each routine also keeps track of *//* the beginning of the construct that it recognises (in Fi). *//* *//*************************************************************************/Boolean Expression()/* ---------- */{ int Fi=BN; if ( Buff[BN] == ' ' ) BN++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -