📄 dtree2.orig
字号:
static void _mul_wgt (TUPLE **tpls, int n, double f){ /* --- multiply tuple weights */ INST *info; /* temporary buffer */ assert(tpls && (n >= 0)); /* check the function arguments */ for (tpls += n; --n >= 0; ) { /* traverse the tuples */ info = tpl_info(*--tpls); /* get the additional information */ info->f = (float)(info->f *f); } /* multiply with weighting factor */} /* _mul_wgt() *//*--------------------------------------------------------------------*/#ifdef DT_PRUNEstatic double _sum_wgt (TUPLE **tpls, int n){ /* --- sum tuple weights */ double wgt = 0; /* traverse the tuples */ assert(tpls && (n >= 0)); /* check the function arguments */ for (tpls += n; --n >= 0; ) wgt += tpl_info(*--tpls)->f; return wgt; /* sum weights and return this sum */} /* _sum_wgt() */#endif/*---------------------------------------------------------------------- Comparison Functions----------------------------------------------------------------------*/#ifdef DT_GROWstatic int _cmp_int (const void *p1, const void *p2, void *data){ /* --- compare a column of two tuples */ register int i1 = tpl_colval((TUPLE*)p1, (int)data)->i; register int i2 = tpl_colval((TUPLE*)p2, (int)data)->i; if (i1 < i2) return -1; /* get column values and */ return (i1 > i2) ? 1 : 0; /* return sign of their difference */} /* _cmp_int() *//*--------------------------------------------------------------------*/static int _cmp_flt (const void *p1, const void *p2, void *data){ /* --- compare a column of two tuples */ register float f1 = tpl_colval((TUPLE*)p1, (int)data)->f; register float f2 = tpl_colval((TUPLE*)p2, (int)data)->f; if (f1 < f2) return -1; /* get column values and */ return (f1 > f2) ? 1 : 0; /* return sign of their difference */} /* _cmp_flt() */#endif/*---------------------------------------------------------------------- Auxiliary Functions for Growing and Pruning----------------------------------------------------------------------*/static TUPLE** _tuples (TABLE *table, int trgid, int *n){ /* --- collect tuples w. known target */ int i; /* loop variable */ int type; /* type of target attribute */ TUPLE **vec, **p, *tpl; /* to traverse the tuple vector */ INST *inst; /* to access the target value */ assert(table && n /* check the function arguments */ && (trgid >= 0) && (trgid < tab_colcnt(table))); i = tab_tplcnt(table); /* get the number of tuples */ p = vec = (TUPLE**)malloc(i *sizeof(TUPLE*)); if (!p) return NULL; /* allocate a tuple vector */ type = att_type(tab_col(table, trgid)); while (--i >= 0) { /* traverse the tuples in the table */ tpl = tab_tpl(table, i); /* and check the target value */ inst = tpl_colval(tpl, trgid); if (((type == AT_SYM) && (inst->i <= UV_SYM)) || ((type == AT_INT) && (inst->i <= UV_INT)) || ((type == AT_FLT) && (inst->f <= UV_FLT))) continue; /* skip tuples with unknown target, */ *p++ = tpl; /* but collect all other tuples */ tpl_info(tpl)->f = tpl_getwgt(tpl); } /* copy the tuple weight */ *n = (int)(p -vec); /* compute the number of tuples */ return vec; /* and return the tuple vector */} /* _tuples() *//*--------------------------------------------------------------------*/static DTNODE* _leaf_sym (DTREE *dt, FRQTAB *frqtab, int x){ /* --- create a leaf (sym. target) */ int i, k; /* loop variable, class id */ DTNODE *node; /* created leaf node */ DTDATA *data; /* to traverse the data vector */ double sum, frq; /* (sum of) the class frequencies */ double max = 0; /* maximum of the class frequencies */ double wgt; /* weight of cases with unknown value */ assert(dt && frqtab /* check the function arguments */ && (ft_known(frqtab) > 0) && (ft_frq_x(frqtab, x) > 0)); node = (DTNODE*)malloc(sizeof(DTNODE) +sizeof(DTDATA)*(dt->clscnt-1)); if (!node) return NULL; /* create a leaf node */ node->flags = DT_LEAF; /* set the leaf flag */ node->attid = dt->trgid; /* store the class attribute id */ node->size = dt->clscnt; /* and the number of classes */ /* It is convenient to abuse the cut value for storing the */ /* frequency of tuples with a known value. This is possible, */ /* because the cut value is not used in leaf nodes. Hence: */ sum = ft_frq_x(frqtab, x); /* get and store the number of */ node->cut = (float)sum; /* tuples with a known value */ frq = ft_frq_x(frqtab, -1); /* check for unknown values */ if (frq <= 0) { /* if there are no unknown values */ data = node->vec; /* traverse the classes */ for (i = k = 0; i < node->size; i++) { frq = ft_frq_xy(frqtab, x, i); if (frq > max) { max = frq; k = i; } (data++)->frq = (float)frq; /* note the class frequencies and */ } } /* determine the most frequent class */ else { /* if there are unknown values */ wgt = ft_known(frqtab); /* compute the weighting factor */ wgt = (wgt > 0) ? sum/wgt : (1.0/ft_xcnt(frqtab)); sum += wgt *frq; /* and add the weight of unknowns, */ data = node->vec; /* then traverse the classes */ for (i = k = 0; i < node->size; i++) { frq = ft_frq_xy(frqtab, x, i) +wgt *ft_frq_xy(frqtab, -1, i); if (frq > max) { max = frq; k = i; } (data++)->frq = (float)frq; } /* compute the class frequencies and */ } /* determine the most frequent class */ node->frq = (float)sum; /* store the total frequency, */ node->err = sum -max; /* the number of leaf errors, */ node->trg.i = k; /* and the most frequent class */ return node; /* return the created leaf node */} /* _leaf_sym() *//*--------------------------------------------------------------------*/static DTNODE* _leaf_num (DTREE *dt, VARTAB *vartab, int x){ /* --- create a leaf (num. target) */ DTNODE *node; /* created leaf node */ double frq, unk; /* (sum of) the column frequencies */ double wgt; /* weight of cases with unknown value */ double m; /* temporary buffer */ assert(dt && vartab /* check the function arguments */ && (vt_known (vartab) > 0) && (vt_colfrq(vartab, x) > 0)); node = (DTNODE*)malloc(sizeof(DTNODE) -sizeof(DTDATA)); if (!node) return NULL; /* create a leaf node */ node->flags = DT_LEAF; /* set the leaf flag */ node->attid = dt->trgid; /* store the target attribute id */ node->size = 0; /* clear the vector size */ /* Again the cut value is abused for storing the frequency */ /* of tuples with a known value: */ frq = vt_colfrq(vartab, x); /* get and store the number of */ node->cut = (float)frq; /* tuples with a known value */ unk = vt_colfrq(vartab, -1); /* check for unknown values */ if (unk <= 0) { /* if there are no unknown values, */ node->frq = (float)frq; /* copy the aggregates directly */ node->err = vt_colsse(vartab, x); node->trg.f = (float)vt_colmean(vartab, x); } else { /* if there are unknown values, */ wgt = vt_known(vartab); /* compute the weighting factor */ unk *= wgt = (wgt > 0) ? frq/wgt : (1.0/vt_colcnt(vartab)); node->err = vt_colssv(vartab, x) +wgt *vt_colssv(vartab, -1); node->frq = (float)(wgt = frq +unk); node->trg.f = (float)(m = (frq *vt_colmean(vartab, x) +unk *vt_colmean(vartab,-1)) /wgt); node->err -= wgt *m*m; /* combine the column aggregates by */ } /* recomputing mean and sse */ if (node->err < 0) node->err = 0; return node; /* return the created leaf node */} /* _leaf_num() *//*--------------------------------------------------------------------*/void _delete (DTNODE *node){ /* --- recursively delete a subtree */ int i; /* loop variable */ DTDATA *data; /* to traverse the data vector */ assert(node); /* check the function argument */ if (!(node->flags & DT_LEAF)){/* if the node is no leaf */ for (data = node->vec +(i = node->size); --i >= 0; ) if ((--data)->child && !islink(data, node)) _delete(data->child); /* recursively delete all subtrees, */ } /* avoiding empty subtrees and links */ free(node); /* delete the node itself */} /* _delete() *//*--------------------------------------------------------------------*/#ifdef DT_PRUNEstatic DTNODE* _adapt (ATTSET *attset, DTNODE *node){ /* --- adapt test node to new values */ int i, n; /* loop variable, number of values */ DTNODE *new; /* adapted test node */ DTDATA *data, *tmp; /* to traverse data vector, buffer */ assert(attset && node); /* check the function arguments */ n = att_valcnt(as_att(attset, node->attid)); if (node->size >= n) /* get number of values and check the */ return node; /* node's data vector size against it */ tmp = node->vec; /* note the old data vector */ new = (DTNODE*)realloc(node, sizeof(DTNODE) +(n-1) *sizeof(DTDATA)); if (!new) return NULL; /* create a new test node */ if ((new != node) /* if the node has been changed */ && (new->flags & DT_LINK)) { /* and it contains links */ for (data = new->vec +(i = new->size); --i >= 0; ) { --data; /* traverse the data vector */ if ((data->link >= tmp) /* if a pointer is a link */ && (data->link < tmp +new->size)) data->link = new->vec +(data->link -tmp); } /* adapt the link, i.e., compute the */ } /* address of the new vector field */ data = new->vec +n; /* enlarge node and clear pointers */ do { (--data)->child = NULL; } while (++new->size < n); return new; /* return the enlarged node */} /* _adapt() *//*--------------------------------------------------------------------*/static DTNODE* _coll_rec (DTNODE *node, double *frq){ /* --- recursive part of _collapse */ int i; /* loop variable */ DTNODE *res, *tmp; /* resulting leaf node */ DTDATA *data; /* to traverse the data vector */ assert(node && frq); /* check the function arguments */ if (node->flags & DT_LEAF) { /* if the node is a leaf */ frq += node->size; /* traverse the frequency buffer */ for (data = node->vec +(i = node->size); --i >= 0; ) *--frq += (--data)->frq; /* aggregate the class frequencies */ return node; /* and return the leaf node */ } /* as a possible result node */ res = NULL; /* traverse the child nodes */ for (data = node->vec +(i = node->size); --i >= 0; ) { if (!(--data)->child || islink(data, node)) continue; /* skip empty nodes and links */ tmp = _coll_rec(data->child, frq); if (!res) res = tmp; /* recursively aggregate */ else free(tmp); /* the class frequencies */ } /* and keep one leaf node */ free(node); /* delete the current node */ return res; /* return the leaf node kept */} /* _coll_rec() *//*--------------------------------------------------------------------*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -