📄 dtree1.c
字号:
} /* set the new path vector */ dt->path[dt->plen++] = dt->curr; /* append node to path */ dt->newp = &(dt->curr->vec[index].child); dt->curr = *(dt->newp); /* get the index-th child */ return (dt->curr) ? 0 : 1; /* return 'ok' or 'node missing' */} /* dt_down() *//*--------------------------------------------------------------------*/int dt_node (DTREE *dt, int attid, double cut){ /* --- create a new tree node */ ATT *att; /* test attribute */ DTNODE *node; /* new tree node */ DTDATA *data, tmp; /* to traverse data vector */ int size; /* size of node data vector */ assert(dt && (attid < as_attcnt(dt->attset))); if (dt->curr) return -2; /* check the current node and */ if (attid < 0) attid = dt->trgid; /* get the attribute id */ att = as_att(dt->attset, attid); size = (att_type(att) == AT_SYM) ? att_valcnt(att) : 2; node = (DTNODE*)malloc(sizeof(DTNODE) +(size-1) *sizeof(DTDATA)); if (!node) return -1; /* create a new node and */ node->size = size; /* initialize its fields */ node->flags = (attid == dt->trgid) ? DT_LEAF : 0; node->attid = attid; /* set attribute id, cut value, */ node->cut = cut; /* node freq., and number of errors */ node->frq = 0.0F; node->err = 0.0; if (dt->type == AT_SYM) node->trg.i = 0; else node->trg.f = 0.0F; if (attid != dt->trgid) tmp.child = NULL; else { dt->total = -1; tmp.frq = 0.0F; } data = node->vec +size; /* clear the data vector */ while (--size >= 0) *--data = tmp; *(dt->newp) = dt->curr = node;/* set pointers to the new node */ dt->size++; /* increase the node counter */ if (dt->plen >= dt->height) /* if path is longer than height, */ dt->height = dt->plen+1; /* update the tree height */ dt->attcnt = -1; /* invalidate the number of atts. */ return 0; /* return 'ok' */} /* dt_node() *//*--------------------------------------------------------------------*/int dt_link (DTREE *dt, int src, int dst){ /* --- link a child to another child */ DTDATA *data; /* data vector element */ assert(dt); /* check the function arguments */ if (!dt->curr) return -2; /* and the current node */ assert((src >= 0) && (src < dt->curr->size) && (dst >= 0) && (dst < dt->curr->size)); data = dt->curr->vec +src; /* get the data vector element */ if (data->child) return -1; /* if a child/link exists, abort */ data->link = dt->curr->vec +dst; /* link the child nodes */ dt->curr->flags |= DT_LINK; /* and set the link flag */ return 0; /* return 'ok' */} /* dt_link() *//*--------------------------------------------------------------------*/int dt_dest (DTREE *dt, int index){ /* --- get destination of a link */ DTDATA *data; /* data vector element */ assert(dt); /* check the function argument */ if (!dt->curr) return -2; /* and the current node */ assert((index >= 0) && (index < dt->curr->size)); data = dt->curr->vec +index; /* get the data vector element */ while (islink(data, dt->curr))/* while the field is a link, */ data = data->link; /* follow it to the next field */ return (int)(data -dt->curr->vec);} /* dt_dest() */ /* return the destination index *//*--------------------------------------------------------------------*/float dt_total (DTREE *dt){ /* --- get total of frequencies */ int i, k; /* loop variable, buffer section size */ double *buf; /* aggr. buffer (for all levels) */ assert(dt); /* check the function argument */ if (dt->total >= 0) /* if the frequency total is valid, */ return dt->total; /* return it directly */ if (!dt->root) /* if there is no tree, */ return dt->total = 0.0F; /* return 0 (no tuples) */ k = (dt->type == AT_SYM) ? dt->clscnt : -3; buf = (double*)malloc(dt->height *abs(k) *sizeof(double)); if (!buf) return -1; /* allocate an aggregation buffer */ for (buf += (i = abs(k)); --i >= 0; ) *--buf = 0; /* clear the first buffer section, */ _aggr(dt->root, k, buf); /* aggregate the class frequencies, */ free(buf); /* and delete the aggregation buffer */ return dt->total = dt->root->frq;} /* dt_total() */ /* return the total frequency *//*--------------------------------------------------------------------*/int dt_attchk (DTREE *dt){ /* --- check occurring attributes */ int i, n = 0; /* loop variable, counter */ assert(dt); /* check the function argument */ for (i = as_attcnt(dt->attset); --i >= 0; ) /* unmark all */ att_setmark(as_att(dt->attset, i), -1); /* attributes */ att_setmark(as_att(dt->attset, dt->trgid), 0); /* mark target */ if (dt->root) _mark(dt->attset, dt->root); /* and test atts. */ for (i = as_attcnt(dt->attset); --i >= 0; ) if (att_getmark(as_att(dt->attset, i)) >= 0) n++; /* count occurring attributes */ return dt->attcnt = n; /* and return their number */} /* dt_attchk() *//*--------------------------------------------------------------------*/int dt_attcnt (DTREE *dt){ /* --- get the number of occ. atts. */ if (dt->attcnt > 0) /* if the number of atts. is valid, */ return dt->attcnt; /* return it directly */ return dt->attcnt = dt_attchk(dt);} /* dt_attcnt() */ /* otherwise determine it *//*---------------------------------------------------------------------- Execution Functions----------------------------------------------------------------------*/static void _exec (DTREE *dt, DTNODE *node, double weight){ /* --- recursively classify a tuple */ int i; /* loop variable, index */ int type; /* type of test attribute */ DTDATA *data; /* to traverse data vector */ double *frq; /* to traverse the class frequencies */ float f; /* value of continuous attribute */ assert(dt && node && (weight >= 0)); /* check the function args. */ /* --- leaf node --- */ if (node->flags & DT_LEAF) { /* if the node is a leaf */ frq = dt->buf; /* get the buffer vector of the tree */ if (dt->type != AT_SYM) { /* if the target att. is numeric, */ frq[0] += weight *node->frq; /* aggregate the data */ frq[1] += weight *node->err; /* for the prediction */ frq[2] += weight *node->trg.f; } else { /* if the target att. is symbolic, */ assert(node->size == dt->clscnt); frq += node->size; /* traverse the data vector */ for (data = node->vec +(i = node->size); --i >= 0; ) *--frq += (--data)->frq *weight; } /* add the class frequencies */ return; /* to the result distribution, */ } /* then terminate the function */ /* --- test node --- */ type = att_type(as_att(dt->attset, node->attid)); if (type != AT_FLT) { /* if symbolic or integer attribute */ if (dt->tuple) i = tpl_colval(dt->tuple, node->attid)->i; else i = att_inst(as_att(dt->attset, node->attid))->i; if (type == AT_SYM) { /* if symbolic attribute */ if (i >= node->size) i = -1; } else { /* if integer attribute */ i = ((i <= UV_INT) || (i == node->cut)) ? -1 : ((i <= node->cut) ? 0 : 1); } } /* determine the vector index */ else { /* if real/float attribute */ if (dt->tuple) f = tpl_colval(dt->tuple, node->attid)->f; else f = att_inst(as_att(dt->attset, node->attid))->f; i = ((f <= UV_FLT) || (f == node->cut)) ? -1 : ((f <= node->cut) ? 0 : 1); } /* determine the vector index */ /* --- attribute value is known --- */ if (i >= 0) { /* if an index has been determined, */ data = node->vec +i; /* get corresponding vector element */ while (islink(data, node)) /* follow subset links */ data = data->link; /* to the true child node */ if (data->child) /* if the child node exists, */ _exec(dt, data->child, weight); /* execute the subtree */ else if (dt->type == AT_SYM)/* if the target att. is symbolic */ dt->buf[node->trg.i] += weight *EPSILON; else { /* if the target att. is numeric */ dt->buf[0] += weight *EPSILON; dt->buf[2] += weight *EPSILON *node->trg.f; } /* predict with the node's data */ return; /* (use the node as a fallback), */ } /* then terminate the function */ /* -- attribute value is unknown --- */ weight /= node->frq; /* remove the node weight and */ data = node->vec; /* get the node's data vector */ for (i = node->size; --i >= 0; data++) { if (islink(data, node)) /* traverse all child nodes, */ continue; /* but skip links to other children */ if (data->child) /* if the child node exists, execute */ _exec(dt, data->child, weight *data->child->frq); /* subtree */ else if (dt->type == AT_SYM)/* if the target att. is symbolic */ dt->buf[node->trg.i] += weight *EPSILON; else { /* if the target att. is numeric */ dt->buf[0] += weight *EPSILON; dt->buf[2] += weight *EPSILON *node->trg.f; } /* predict with the node's data */ } /* (use the node as a fallback) */} /* _exec() *//*--------------------------------------------------------------------*/int dt_exec (DTREE *dt, TUPLE *tuple, INST *res, float *supp, float *conf){ /* --- classify tuple with dec. tree */ int i, k; /* loop variable, buffer size */ double *frq, *max; /* to traverse the class frequencies */ double sum; /* sum of the class frequencies */ assert(dt); /* check the function argument */ dt->tuple = tuple; /* store the tuple to classify */ if (dt->total < 0) { /* if total is not valid, compute it */ if (dt_total(dt) < 0) return -1; } k = (dt->type == AT_SYM) ? dt->clscnt : 3; for (frq = dt->buf +(i = k); --i >= 0; ) *--frq = 0; /* clear the buffer vector */ if (dt->root) /* if a decision tree exists, */ _exec(dt, dt->root, 1); /* execute the decision tree */ if (dt->type != AT_SYM) { /* if the target att. is numeric, */ sum = dt->buf[0]; /* compute the pedicted value and */ res->f = (float)dt->buf[2]; /* store prediction support and error */ if (supp) *supp = (float)sum; if (conf) *conf = (sum > 0) ? (float)sqrt(dt->buf[1] /sum) : 0.0F; } else { /* if the target att. is symbolic */ frq = max = dt->buf; /* traverse the */ sum = *frq; /* class frequency table */ for (i = dt->clscnt; --i > 0; ) { if (*++frq > *max) max = frq; sum += *frq; /* find the most frequent class */ } /* and sum the class frequencies */ res->i = (int)(max-dt->buf);/* set the classification result */ if (supp) *supp = (float)sum; if (conf) *conf = (float)(*max /sum); } /* store class. support and error */ return 0; /* return 'ok' */} /* dt_exec() *//*---------------------------------------------------------------------- Description Functions----------------------------------------------------------------------*/static void _indent (DTOUT *out, int newline){ /* --- indent output line */ int i; /* loop variable */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -