⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dtree2.orig

📁 dTree是一个运行在WinCE上的文件管理软件。类似文件管理器,功能强大
💻 ORIG
📖 第 1 页 / 共 5 页
字号:
static void _mul_wgt (TUPLE **tpls, int n, double f){                               /* --- multiply tuple weights */  INST *info;                   /* temporary buffer */  assert(tpls && (n >= 0));     /* check the function arguments */  for (tpls += n; --n >= 0; ) { /* traverse the tuples */    info = tpl_info(*--tpls);   /* get the additional information */    info->f = (float)(info->f *f);  }                             /* multiply with weighting factor */}  /* _mul_wgt() *//*--------------------------------------------------------------------*/#ifdef DT_PRUNEstatic double _sum_wgt (TUPLE **tpls, int n){                               /* --- sum tuple weights */  double wgt = 0;               /* traverse the tuples */  assert(tpls && (n >= 0));     /* check the function arguments */  for (tpls += n; --n >= 0; ) wgt += tpl_info(*--tpls)->f;  return wgt;                   /* sum weights and return this sum */}  /* _sum_wgt() */#endif/*----------------------------------------------------------------------  Comparison Functions----------------------------------------------------------------------*/#ifdef DT_GROWstatic int _cmp_int (const void *p1, const void *p2, void *data){                               /* --- compare a column of two tuples */  register int i1 = tpl_colval((TUPLE*)p1, (int)data)->i;  register int i2 = tpl_colval((TUPLE*)p2, (int)data)->i;  if     (i1 < i2) return -1;   /* get column values and */  return (i1 > i2) ? 1 : 0;     /* return sign of their difference */}  /* _cmp_int() *//*--------------------------------------------------------------------*/static int _cmp_flt (const void *p1, const void *p2, void *data){                               /* --- compare a column of two tuples */  register float f1 = tpl_colval((TUPLE*)p1, (int)data)->f;  register float f2 = tpl_colval((TUPLE*)p2, (int)data)->f;  if     (f1 < f2) return -1;   /* get column values and */  return (f1 > f2) ? 1 : 0;     /* return sign of their difference */}  /* _cmp_flt() */#endif/*----------------------------------------------------------------------  Auxiliary Functions for Growing and Pruning----------------------------------------------------------------------*/static TUPLE** _tuples (TABLE *table, int trgid, int *n){                               /* --- collect tuples w. known target */  int   i;                      /* loop variable */  int   type;                   /* type of target attribute */  TUPLE **vec, **p, *tpl;       /* to traverse the tuple vector */  INST  *inst;                  /* to access the target value */  assert(table && n             /* check the function arguments */      && (trgid >= 0) && (trgid < tab_colcnt(table)));  i = tab_tplcnt(table);        /* get the number of tuples */  p = vec = (TUPLE**)malloc(i *sizeof(TUPLE*));  if (!p) return NULL;          /* allocate a tuple vector */  type = att_type(tab_col(table, trgid));  while (--i >= 0) {            /* traverse the tuples in the table */    tpl  = tab_tpl(table, i);   /* and check the target value */    inst = tpl_colval(tpl, trgid);    if (((type == AT_SYM) && (inst->i <= UV_SYM))    ||  ((type == AT_INT) && (inst->i <= UV_INT))    ||  ((type == AT_FLT) && (inst->f <= UV_FLT)))      continue;                 /* skip tuples with unknown target, */    *p++ = tpl;                 /* but collect all other tuples */    tpl_info(tpl)->f = tpl_getwgt(tpl);  }                             /* copy the tuple weight */  *n = (int)(p -vec);           /* compute the number of tuples */  return vec;                   /* and return the tuple vector */}  /* _tuples() *//*--------------------------------------------------------------------*/static DTNODE* _leaf_sym (DTREE *dt, FRQTAB *frqtab, int x){                               /* --- create a leaf (sym. target) */  int    i, k;                  /* loop variable, class id */  DTNODE *node;                 /* created leaf node */  DTDATA *data;                 /* to traverse the data vector */  double sum, frq;              /* (sum of) the class frequencies */  double max = 0;               /* maximum of the class frequencies */  double wgt;                   /* weight of cases with unknown value */  assert(dt && frqtab           /* check the function arguments */      && (ft_known(frqtab)    > 0)      && (ft_frq_x(frqtab, x) > 0));  node = (DTNODE*)malloc(sizeof(DTNODE) +sizeof(DTDATA)*(dt->clscnt-1));  if (!node) return NULL;       /* create a leaf node */  node->flags = DT_LEAF;        /* set the leaf flag */  node->attid = dt->trgid;      /* store the class attribute id */  node->size  = dt->clscnt;     /* and the number of classes */  /* It is convenient to abuse the cut value for storing the   */  /* frequency of tuples with a known value. This is possible, */  /* because the cut value is not used in leaf nodes. Hence:   */  sum = ft_frq_x(frqtab, x);    /* get and store the number of */  node->cut = (float)sum;       /* tuples with a known value */  frq = ft_frq_x(frqtab, -1);   /* check for unknown values */  if (frq <= 0) {               /* if there are no unknown values */    data = node->vec;           /* traverse the classes */    for (i = k = 0; i < node->size; i++) {      frq = ft_frq_xy(frqtab, x, i);      if (frq > max) { max = frq; k = i; }      (data++)->frq = (float)frq;  /* note the class frequencies and */    } }                         /* determine the most frequent class */  else {                        /* if there are unknown values */    wgt = ft_known(frqtab);     /* compute the weighting factor */    wgt = (wgt > 0) ? sum/wgt : (1.0/ft_xcnt(frqtab));    sum += wgt *frq;            /* and add the weight of unknowns, */    data = node->vec;           /* then traverse the classes */    for (i = k = 0; i < node->size; i++) {      frq = ft_frq_xy(frqtab, x, i) +wgt *ft_frq_xy(frqtab, -1, i);      if (frq > max) { max = frq; k = i; }      (data++)->frq = (float)frq;    }                           /* compute the class frequencies and */  }                             /* determine the most frequent class */  node->frq   = (float)sum;     /* store the total frequency, */  node->err   = sum -max;       /* the number of leaf errors, */  node->trg.i = k;              /* and the most frequent class */  return node;                  /* return the created leaf node */}  /* _leaf_sym() *//*--------------------------------------------------------------------*/static DTNODE* _leaf_num (DTREE *dt, VARTAB *vartab, int x){                               /* --- create a leaf (num. target) */  DTNODE *node;                 /* created leaf node */  double frq, unk;              /* (sum of) the column frequencies */  double wgt;                   /* weight of cases with unknown value */  double m;                     /* temporary buffer */  assert(dt && vartab           /* check the function arguments */      && (vt_known (vartab)    > 0)      && (vt_colfrq(vartab, x) > 0));  node = (DTNODE*)malloc(sizeof(DTNODE) -sizeof(DTDATA));  if (!node) return NULL;       /* create a leaf node */  node->flags = DT_LEAF;        /* set the leaf flag */  node->attid = dt->trgid;      /* store the target attribute id */  node->size  = 0;              /* clear the vector size */  /* Again the cut value is abused for storing the frequency */  /* of tuples with a known value:                           */  frq = vt_colfrq(vartab, x);   /* get and store the number of */  node->cut = (float)frq;       /* tuples with a known value */  unk = vt_colfrq(vartab, -1);  /* check for unknown values */  if (unk <= 0) {               /* if there are no unknown values, */    node->frq   = (float)frq;   /* copy the aggregates directly */    node->err   = vt_colsse(vartab, x);    node->trg.f = (float)vt_colmean(vartab, x); }  else {                        /* if there are unknown values, */    wgt = vt_known(vartab);     /* compute the weighting factor */    unk *= wgt = (wgt > 0) ? frq/wgt : (1.0/vt_colcnt(vartab));    node->err   = vt_colssv(vartab, x) +wgt *vt_colssv(vartab, -1);    node->frq   = (float)(wgt = frq +unk);    node->trg.f = (float)(m   = (frq *vt_colmean(vartab, x)                                +unk *vt_colmean(vartab,-1)) /wgt);    node->err  -= wgt *m*m;     /* combine the column aggregates by */  }                             /* recomputing mean and sse */  if (node->err < 0) node->err = 0;  return node;                  /* return the created leaf node */}  /* _leaf_num() *//*--------------------------------------------------------------------*/void _delete (DTNODE *node){                               /* --- recursively delete a subtree */  int    i;                     /* loop variable */  DTDATA *data;                 /* to traverse the data vector */  assert(node);                 /* check the function argument */  if (!(node->flags & DT_LEAF)){/* if the node is no leaf */    for (data = node->vec +(i = node->size); --i >= 0; )      if ((--data)->child && !islink(data, node))        _delete(data->child);   /* recursively delete all subtrees, */  }                             /* avoiding empty subtrees and links */  free(node);                   /* delete the node itself */}  /* _delete() *//*--------------------------------------------------------------------*/#ifdef DT_PRUNEstatic DTNODE* _adapt (ATTSET *attset, DTNODE *node){                               /* --- adapt test node to new values */  int    i, n;                  /* loop variable, number of values */  DTNODE *new;                  /* adapted test node */  DTDATA *data, *tmp;           /* to traverse data vector, buffer */  assert(attset && node);       /* check the function arguments */  n = att_valcnt(as_att(attset, node->attid));  if (node->size >= n)          /* get number of values and check the */    return node;                /* node's data vector size against it */  tmp = node->vec;              /* note the old data vector */  new = (DTNODE*)realloc(node, sizeof(DTNODE) +(n-1) *sizeof(DTDATA));  if (!new) return NULL;        /* create a new test node */  if ((new != node)             /* if the node has been changed */  &&  (new->flags & DT_LINK)) { /* and it contains links */    for (data = new->vec +(i = new->size); --i >= 0; ) {      --data;                   /* traverse the data vector */      if ((data->link >= tmp)   /* if a pointer is a link */      &&  (data->link <  tmp +new->size))        data->link = new->vec +(data->link -tmp);    }                           /* adapt the link, i.e., compute the */  }                             /* address of the new vector field */  data = new->vec +n;           /* enlarge node and clear pointers */  do { (--data)->child = NULL; } while (++new->size < n);  return new;                   /* return the enlarged node */}  /* _adapt() *//*--------------------------------------------------------------------*/static DTNODE* _coll_rec (DTNODE *node, double *frq){                               /* --- recursive part of _collapse */  int    i;                     /* loop variable */  DTNODE *res, *tmp;            /* resulting leaf node */  DTDATA *data;                 /* to traverse the data vector */  assert(node && frq);          /* check the function arguments */  if (node->flags & DT_LEAF) {  /* if the node is a leaf */    frq += node->size;          /* traverse the frequency buffer */    for (data = node->vec +(i = node->size); --i >= 0; )      *--frq += (--data)->frq;  /* aggregate the class frequencies */    return node;                /* and return the leaf node */  }                             /* as a possible result node */  res = NULL;                   /* traverse the child nodes */  for (data = node->vec +(i = node->size); --i >= 0; ) {    if (!(--data)->child || islink(data, node))      continue;                 /* skip empty nodes and links */    tmp = _coll_rec(data->child, frq);    if (!res) res = tmp;        /* recursively aggregate */    else      free(tmp);        /* the class frequencies */  }                             /* and keep one leaf node */  free(node);                   /* delete the current node */  return res;                   /* return the leaf node kept */}  /* _coll_rec() *//*--------------------------------------------------------------------*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -