📄 dtree1.c
字号:
ATT *att; /* test attribute */ int i, k, t; /* att./value id, flag, buffer */ double f; /* buffer for a floating point number */ int err = 0; /* parse error status */ assert(dt && scan); /* check the function arguments */ GET_CHR('{'); /* consume '{' */ if (sc_token(scan) == '}') { /* if the (sub)tree is empty, */ GET_TOK(); return 0; } /* consume '}' and return */ /* --- read a leaf node --- */ if (sc_token(scan) != '(') { /* if at a leaf (no further test) */ if (dt_node(dt, -1, UV_FLT) != 0) ERROR(E_NOMEM); /* create a leaf node */ if (dt->type == AT_SYM) { /* if the target attrib. is symbolic, */ t = _clsin(dt, scan, dt->curr); if (t) return t; } /* read the class frequencies */ else { /* if the target att. is numeric */ if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); f = atof(sc_value(scan)); /* get and check the target value */ if ((f < UV_FLT) || (f > FLT_MAX)) ERROR(E_ILLNUM); GET_TOK(); /* consume the target value */ dt->curr->trg.f = (float)f; /* and store it */ GET_CHR('~'); /* consume '~' */ if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); f = atof(sc_value(scan)); /* get and check the prediction error */ if ((f < 0) || (f > DBL_MAX)) ERROR(E_ILLNUM); GET_TOK(); /* consume the prediction error */ dt->curr->err = f*f; /* and store its square (mse) */ GET_CHR('['); /* consume '[' */ if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); f = atof(sc_value(scan)); /* get and check the node frequency */ if ((f < 0) || (f > FLT_MAX)) ERROR(E_ILLNUM); GET_TOK(); /* consume the node frequency */ dt->curr->frq = (float)f; /* and store it */ dt->curr->err *= f; /* compute the sse from the mse */ GET_CHR(']'); /* consume ']' */ } GET_CHR('}'); return 0; /* consume '}' and */ } /* terminate the function */ /* --- read a test node --- */ GET_TOK(); /* consume '(' */ t = sc_token(scan); /* check for a name */ if ((t != T_ID) && (t != T_NUM)) ERROR(E_ATTEXP); i = as_attid(dt->attset, sc_value(scan)); if (i < 0) ERROR(E_UNKATT); GET_TOK(); /* get and consume the attribute */ if (dt_node(dt, i, 0) != 0) ERROR(E_NOMEM); att = as_att(dt->attset, i); /* create a test node and */ type = att_type(att); /* get the test attribute */ if (type != AT_SYM) { /* if the attribute is numeric, */ GET_CHR('|'); /* consume '|' */ if (sc_token(scan) != T_NUM) ERROR(E_NUMEXP); f = atof(sc_value(scan)); /* get and check the cut value */ if ((f < UV_FLT) || (f > FLT_MAX)) ERROR(E_ILLNUM); dt->curr->cut = (float)f; /* store the cut value */ GET_TOK(); /* and consume it */ } GET_CHR(')'); /* consume ')' */ k = 0; /* initialize the read counter */ while (sc_token(scan) != '}'){/* attribute value read loop */ if (++k > 1) { /* if this is not the first value, */ GET_CHR(','); } /* consume ',' (separator) */ if (type != AT_SYM) { /* --- if the attribute is numeric */ t = sc_token(scan); /* get the next token */ if (t == '<') i = 0; /* if lower than cut: index 0 */ else if (t == '>') i = 1; /* if greater than cut: index 1 */ else { pa_error(scan, E_UNKVAL, -1, NULL); RECOVER(); err = 1; continue; } if (dt->curr->vec[i].child) { pa_error(scan, E_DUPVAL, -1, NULL); RECOVER(); err = 1; continue; } GET_TOK(); /* consume '<' or '>' */ GET_CHR(':'); } /* consume ':' */ else { /* --- if the attribute is symbolic */ i = _getval(dt, scan, att); if (i < 0) { RECOVER(); err = 1; continue; } } /* read a (list of) value(s) */ if (dt_down(dt, i, 0) < 0) /* go down in the tree */ ERROR(E_NOMEM); t = _dtin(dt, scan); /* get subtree recursively */ dt_up(dt, 0); /* go up again in the tree */ if (t) { /* if an error occurred */ if (t < E_FWRITE) { RECOVER(); } else if (t < 0) return t; err = 1; /* try to recover, but */ } /* abort on fatal errors */ } GET_TOK(); /* consume '}' */ return err; /* return parse error status */} /* _dtin() *//*--------------------------------------------------------------------*/static int _parse (ATTSET *attset, SCAN *scan, DTREE **pdt){ /* --- read decision tree */ int trgid; /* class attribute identifier */ int t; /* buffer for token/type/result */ assert(attset && scan && pdt);/* check the function arguments */ if ((sc_token(scan) != T_ID) || (strcmp(sc_value(scan), "dtree") != 0)) ERR_STR("dtree"); /* check for 'dtree' */ GET_TOK(); /* consume 'dtree' */ GET_CHR('('); /* consume '(' */ t = sc_token(scan); /* check for a name */ if ((t != T_ID) && (t != T_NUM)) ERROR(E_ATTEXP); trgid = as_attid(attset, sc_value(scan)); if (trgid < 0) ERROR(E_UNKATT); *pdt = dt_create(attset, trgid); /* get the target attribute */ if (!*pdt) ERROR(E_NOMEM); /* and create a dec./reg. tree */ GET_TOK(); /* consume the target attribute name */ GET_CHR(')'); /* consume ')' */ GET_CHR('='); /* consume '=' */ if (_dtin(*pdt, scan) != 0) /* recursively read dec./reg. tree */ return -1; /* and check for an error */ GET_CHR(';'); /* consume ';' */ return 0; /* return 'ok' */} /* _parse() *//*--------------------------------------------------------------------*/DTREE* dt_parse (ATTSET *attset, SCAN *scan){ /* --- parse a decision tree */ DTREE *dt = NULL; /* created decision tree */ assert(attset && scan); /* check the function arguments */ pa_init(scan); /* initialize parsing */ if (_parse(attset, scan, &dt) != 0) { if (dt) dt_delete(dt, 0); /* parse a decision tree */ return NULL; /* if an error occurred, */ } /* delete the tree and abort */ dt_total(dt); /* compute the node frequencies and */ return dt; /* return the created decision tree */} /* dt_parse() */#endif /* #ifdef DT_PARSE *//*---------------------------------------------------------------------- Rule Extraction Functions----------------------------------------------------------------------*/#ifdef DT_RULESstatic int _rules (DTRULE *rx, DTNODE *node, RULE *path){ /* --- recursively collect rules */ RULE *rule; /* created rule */ int i, k, r; /* loop variables, buffers */ DTDATA *data, *d2; /* to traverse data vector */ INST inst; /* temporary buffer for instance */ assert(rx && node); /* check the function arguments */ /* --- leaf node --- */ if (node->flags & DT_LEAF) { /* if the node is a leaf */ rule = r_create(rx->attset, (path) ? r_condcnt(path) : 0); if (!rule) return -1; /* create a new rule and */ if (path) r_copy(rule,path);/* copy the path into it */ inst = node->trg; /* get the target value */ r_headset(rule, node->attid, R_EQ, &inst); r_setsupp(rule, node->frq); /* set head and rule information */ if (rx->type == AT_SYM) /* if target attribute is symbolic */ r_setconf(rule, (node->frq > 0) ? 1 -node->err/node->frq : 1); else /* if target attribute is numeric */ r_setconf(rule, (node->frq > 0) ? sqrt(node->err/node->frq) : 0); if (rs_ruleadd(rx->ruleset, rule) < 0) { r_delete(rule); return -1; } return 0; /* add the rule to the rule set */ } /* and return 'ok' */ /* --- test node --- */ rule = r_create(rx->attset, (path) ? (r_condcnt(path)+1) : 1); if (!rule) return -1; /* create a new rule */ if (att_type(as_att(rx->attset, node->attid)) != AT_SYM) { data = node->vec; /* if the attribute is numeric */ inst.f = node->cut; /* get the cut value */ if (data->child) { /* if there is a left subtree */ if (path) r_copy(rule, path); /* copy path into rule */ r = r_condadd(rule, node->attid, R_LE, &inst); if ((r == -1) /* add condition to the rule */ || ((r >= 0) && (_rules(rx, data->child, rule) != 0))) { r_delete(rule); return -1; } } /* collect rules from left subtree */ if ((++data)->child) { /* if there is a right subtree */ if (path) r_copy(rule, path); /* copy path into rule or */ else r_condrem(rule, -1); /* remove all conditions */ r = r_condadd(rule, node->attid, R_GE, &inst); if ((r == -1) /* add condition to the rule */ || ((r >= 0) && (_rules(rx, data->child, rule) != 0))) { r_delete(rule); return -1; } } } /* collect rules from right subtree */ else { /* if attribute is symbolic */ inst.p = rx->subset; /* set the subset vector */ for (i = 0; i < node->size; i++) { data = node->vec +i; /* traverse attribute values */ if (!data->child || islink(data, node)) continue; /* skip empty children and links */ for (k = r = 0; k < node->size; k++) { if (k == i) continue; /* traverse children except current */ d2 = node->vec +k; /* follow link chain */ while (islink(d2, node)) d2 = d2->link; if (d2 != data) /* skip children that are */ continue; /* not linked to current child */ rx->subset[++r] = k; /* note the value identifier */ } /* in the value subset vector */ rx->subset[++r] = i; /* store the current value */ rx->subset[0] = r; /* and the number of values */ if (path) r_copy(rule, path); /* copy path into rule */ else r_condrem(rule, -1); /* remove all conditions */ r = r_condadd(rule, node->attid, R_IN, &inst); if ((r == -1) /* add condition to the rule */ || ((r >= 0) && (_rules(rx, data->child, rule) != 0))) { r_delete(rule); return -1; } } /* collect the rules */ } /* from the subtree */ r_delete(rule); /* delete the rule buffer */ return 0; /* and return 'ok' */} /* _rules() *//*--------------------------------------------------------------------*/RULESET* dt_rules (DTREE *dt){ /* --- create a rule set */ int i, n; /* loop variable, buffer */ int max = 0; /* maximal number of values */ RULESET *ruleset; /* created rule set */ DTRULE *rx; /* rule extraction info */ assert(dt); /* check the function argument */ ruleset = rs_create(dt->attset, r_delete); if (!ruleset || !dt->root) /* create an empty rule set */ return ruleset; /* and check for an empty tree */ for (i = as_attcnt(dt->attset); --i >= 0; ) { n = att_valcnt(as_att(dt->attset, i)); if (n > max) max = n; /* determine the maximal number */ } /* of values per attribute */ rx = (DTRULE*)malloc(sizeof(DTRULE) +max *sizeof(int)); if (!rx) { rs_delete(ruleset, 0); return NULL; } rx->ruleset = ruleset; /* create a rule extraction info. */ rx->attset = dt->attset; /* and note the relevant objects */ rx->type = dt->type; n = _rules(rx,dt->root,NULL); /* collect rules from the tree */ free(rx); /* delete the rule extraction info. */ if (n != 0) { rs_delete(ruleset, 0); return NULL; } return ruleset; /* return the created rule set */} /* dt_rules() */#endif /* #ifdef DT_RULES */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -