⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dtree2.orig

📁 dTree是一个运行在WinCE上的文件管理软件。类似文件管理器,功能强大
💻 ORIG
📖 第 1 页 / 共 5 页
字号:
static DTNODE* _collapse (DTREE *dt, DTNODE *node){                               /* --- collapse subtree into a leaf */  int    i;                     /* loop variable */  DTDATA *data;                 /* to traverse the data vector */  double *frq, *max;            /* to traverse the class frequencies */  double sum;                   /* sum of the class frequencies */  assert(dt && node);           /* check the function arguments */  if (!node || (node->flags & DT_LEAF))    return node;                /* if the node is a leaf, abort */  /* --- numeric target attribute --- */  if (dt->type != AT_SYM) {     /* if the target att. is numeric */    for (data = node->vec +(i = node->size); --i >= 0; )      if ((--data)->child && !islink(data, node))        _delete(data->child);   /* delete all subtrees of the node */    node->flags = DT_LEAF;      /* turn the node into a leaf, */    node->attid = dt->trgid;    /* store the target identifier, */    node->size  = 0;            /* and clear the vector size */    return (DTNODE*)realloc(node, sizeof(DTNODE) -sizeof(DTDATA));  }                             /* shrink the node to leaf size */  /* --- symbolic target attribute --- */  for (frq = dt->buf +(i = dt->clscnt); --i >= 0; )    *--frq = 0;                 /* clear the class frequencies */  node = _coll_rec(node, frq);  /* collapse the subtree into a leaf */  if (!node) return NULL;       /* if no result leaf found, abort */  max = frq = dt->buf; sum = *frq;  for (i = dt->clscnt; --i > 0; ) {    sum += *++frq;              /* traverse the class frequencies, */    if (*frq > *max) max = frq; /* compute the frequency sum, and */  }                             /* determine the most frequent class */  frq = dt->buf +node->size;    /* traverse the counter vector */  for (data = node->vec +(i = node->size); --i >= 0; )    (--data)->frq = (float)*--frq; /* copy the class frequencies */  node->frq   = (float)sum;        /* set the node frequency, */  node->err   = sum -*max;         /* the number of errors, and */  node->trg.i = (int)(max-frq);    /* the most frequent class */  return node;                  /* return the modified leaf node */}  /* _collapse() */#endif  /* #ifdef DT_PRUNE *//*--------------------------------------------------------------------*/static int _count (DTREE *dt, DTNODE *node, int height){                               /* --- count the number of nodes */  int    i, n = 1;              /* loop variable, node counter */  DTDATA *data;                 /* to traverse the data vector */  assert(dt && node && (height >= 0));  /* check the function args. */  if (++height > dt->height)    /* if beyond the curr. max. height, */    dt->height = height;        /* update the maximal height */  if (!(node->flags & DT_LEAF)){/* if the current node is no leaf */    for (data = node->vec +(i = node->size); --i >= 0; )      if ((--data)->child && !islink(data, node))        n += _count(dt, data->child, height);  }                             /* recursively count the nodes */  return n;                     /* return the number of nodes */}  /* _count() *//*----------------------------------------------------------------------  Evaluation Functions for Symbolic Target Attributes----------------------------------------------------------------------*/#ifdef DT_GROWstatic double _sym_sym (GROW *gi, TUPLE **tpls, int n,                        int attid, float *cut){                               /* --- evaluate a symbolic attribute */  int i, k;                     /* class attribute id, loop variable */  int cls, val;                 /* class and attribute value */  assert(gi && tpls && (n > 0));/* check the function arguments */  k = att_valcnt(as_att(gi->attset, attid));  ft_init((FRQTAB*)gi->curr, k, gi->dtree->clscnt);  i = gi->dtree->trgid;         /* initialize the frequency table */  for (tpls += n; --n >= 0; ) { /* and traverse the tuples */    cls = tpl_colval(*--tpls, i)->i;    val = tpl_colval(*tpls, attid)->i;    ft_add((FRQTAB*)gi->curr, val, cls, tpl_info(*tpls)->f);  }                             /* fill the frequency table */  ft_marg(gi->curr);            /* and marginalize it */  if (gi->flags & DT_SUBSET)    /* if only called to compute the */    return 0;                   /* initial table, abort the function */  for (i = 0; --k >= 0; )       /* count reasonable tuple sets */    if (ft_frq_x((FRQTAB*)gi->curr, k) >= gi->mincnt)      i++;                      /* there must be at least two values */  if (i < 2) return WORTHLESS;  /* with at least _mincnt cases each */  return ft_eval((FRQTAB*)gi->curr, gi->measure, gi->params);}  /* _sym_sym() */             /* evaluate the frequency table *//*--------------------------------------------------------------------*/static double _sym_set (GROW *gi, TUPLE **tpls, int n,                        int attid, float *cut){                               /* -- evaluate subsets of sym. att. */  int    i, k, cls;             /* loop variables */  int    valcnt;                /* number of values/subsets */  int    rtscnt;                /* number of reasonable tuple sets */  int    *sets;                 /* to traverse subset index vector */  int    isrc, idst, imax;      /* indices     of sets to combine */  double fsrc, fdst, fmax;      /* frequencies of sets to combine */  double curr, prev, best;      /* current/previous/best worth */  assert(gi && tpls && (n > 0));     /* check the function arguments */  _sym_sym(gi, tpls, n, attid, cut); /* compute initial freq. table */  /* --- remove unsupported and merge single class values --- */  valcnt = att_valcnt(as_att(gi->attset, attid));  imax   = rtscnt = 0;          /* initialize the variables and */  fmax   = 0; sets = gi->sets;  /* traverse the set index vector */  for (i = 0; i < valcnt; i++){ /* and the attribute values */    if (ft_dest((FRQTAB*)gi->curr, i) != i)      continue;                 /* skip linked/combined values */    fdst = ft_frq_x((FRQTAB*)gi->curr, i);    if (fdst <= 0) continue;    /* skip unsupported attribute values */    *sets++ = i;                /* note value index in index vector */    for (cls = gi->dtree->clscnt; --cls >= 0; ) {      fsrc = ft_frq_xy((FRQTAB*)gi->curr, i, cls);      if (fsrc < fdst *(1-EPSILON)) { /* if no single class set, */        if (fsrc > EPSILON) break;    /* if class is supp., abort */        else continue;                /* otherwise continue */      }                               /* with the next class */      for (k = i; ++k < valcnt; ) {   /* if single class tuple set, */        fsrc = ft_frq_x((FRQTAB*)gi->curr, k);         if (fsrc <= 0) continue;      /* traverse remaining values */        if (ft_frq_xy((FRQTAB*)gi->curr, k, cls) >= fsrc *(1-EPSILON)) {          ft_comb((FRQTAB*)gi->curr, k, i); fdst += fsrc; }      } break;                  /* if second single class set with */    }                           /* same class found, combine values */    if (fdst > fmax) { fmax = fdst; imax = i; }    if (fdst >= gi->mincnt) rtscnt++;  }                             /* count reasonable tuple sets */  valcnt = (int)(sets -gi->sets);  sets   = gi->sets;            /* compute new number of values */  /* --- find best pair mergers --- */  prev = ft_eval((FRQTAB*)gi->curr, gi->measure, gi->params);  if (valcnt <= 2)              /* check whether a merge is possible */    return (rtscnt >= 2) ? prev : WORTHLESS;  while (1) {                   /* select and merge loop */    best = WORTHLESS;           /* initialize worth of partition */    isrc = idst = 0;            /* and value set indices */    for (i = 0; i < valcnt; i++) {  /* traverse the value sets */      if ((rtscnt <= 2)         /* if less than two reasonable sets, */      &&  (i == imax)) continue;    /* skip mergers with largest set */      for (k = i+1; k < valcnt; k++) {    /* traverse remaining sets */        if ((rtscnt <= 2)       /* if less than two reasonable sets, */        &&  (k == imax)) continue;  /* skip mergers with largest set */        ft_comb((FRQTAB*)gi->curr, sets[k], sets[i]);/* combine sets */        curr = ft_eval((FRQTAB*)gi->curr, gi->measure, gi->params);        ft_uncomb((FRQTAB*)gi->curr, sets[k]); /* evaluate partition, */        if (curr <= best) continue;            /* then uncombine sets */        best = curr;            /* if the current partition is better */        isrc = k;               /* than the best one found up to now, */        idst = i;               /* note worth and value set indices */      }                         /* of the current pair merger */    }    if ((best <= WORTHLESS)     /* if no useful partition found */    ||  (best <  prev))         /* or best partition found is worse */      break;                    /* than the previous one, abort loop */    prev = best;                /* note the best worth */    fsrc = ft_frq_x((FRQTAB*)gi->curr, sets[isrc]);    if (fsrc >= gi->mincnt)     /* get the source frequency */      rtscnt--;                 /* and update reasonable tuple sets */    fdst = ft_frq_x((FRQTAB*)gi->curr, sets[idst]);    if (fdst >= gi->mincnt)     /* get the destination frequency */      rtscnt--;                 /* and update reasonable tuple sets */    ft_comb(gi->curr, sets[isrc], sets[idst]);    fdst = ft_frq_x((FRQTAB*)gi->curr, sets[idst]);    if (fdst >= gi->mincnt)     /* combine sets, get new frequency, */      rtscnt++;                 /* and update reasonable tuple sets */    if (fdst >= fmax) { fmax = fdst; imax = idst; }    if (imax > isrc) imax--;    /* adapt vector index of largest set */    for (i = isrc; i < valcnt-1; i++)      sets[i] = sets[i+1];      /* remove source set from vector */    valcnt--;                   /* (shift to preserve value order) */  }                             /* and reduce number of values */  if (rtscnt < 2) return WORTHLESS;  return prev;                  /* return the evaluation result */}  /* _sym_set() *//*--------------------------------------------------------------------*/static double _sym_num (GROW *gi, TUPLE **tpls, int n,                        int attid, float *cut){                               /* --- evaluate a numeric attribute */  int    i, k;                  /* loop variable, class att. index */  int    type;                  /* type of the test attribute */  int    cls;                   /* class of current tuple */  INST   *val, *pval;           /* value of current and prec. tuple */  double wgt,  sum = 0;         /* tuple weight, sum of tuple weights */  double curr, best;            /* current and best cut worth */  assert(gi && tpls && (n > 0) && cut);   /* check the function args. */  /* --- build initial frequency table --- */  type = att_type(as_att(gi->attset, attid));  v_sort(tpls, n, (type == AT_FLT) ? _cmp_flt : _cmp_int, (void*)attid);  ft_init((FRQTAB*)gi->numt, 2, gi->dtree->clscnt);  k = gi->dtree->trgid;         /* sort tuples and initialize table */  while (1) {                   /* traverse tuples with unknown value */    if (n <= 1)                 /* if there is at most one tuple */      return WORTHLESS;         /* with a known att. value, abort */    val = tpl_colval(*tpls, attid);  /* get the next value */    if ((type == AT_INT) ? (val->i > UV_INT)    :                      (val->f > UV_FLT))      break;                    /* if the value is known, abort loop */    n--;                        /* go to the next tuple */    cls = tpl_colval(*tpls, k)->i;   /* get the class */    wgt = tpl_info  (*tpls++)->f;    /* and the tuple weight */    ft_add((FRQTAB*)gi->numt, -1, cls, wgt);  }                             /* store the tuple weight */  do {                          /* traverse tuples with known value */    if (--n <= 0)               /* if there is no tuple left for */      return WORTHLESS;         /* the other side of the cut, abort */    cls = tpl_colval(*tpls, k)->i;   /* get the class */    wgt = tpl_info  (*tpls++)->f;    /* and the tuple weight */    ft_add((FRQTAB*)gi->numt, 0, cls, wgt);    sum += wgt;                 /* store the tuple weight */  } while (sum < gi->mincnt);   /* while minimal size not reached */  for (i = n; --i >= 0; ) {     /* traverse the remaining tuples */    cls = tpl_colval(tpls[i], k)->i; /* get the class */    wgt = tpl_info  (tpls[i])->f;    /* and the tuple weight */    ft_add((FRQTAB*)gi->numt, 1, cls, wgt);  }                             /* store the tuple weight */  ft_marg(gi->numt);            /* marginalize frequency table and */  sum = ft_frq_x((FRQTAB*)gi->numt,1); /* get weight of upper part */  if (sum < gi->mincnt) return WORTHLESS;  /* --- find the best cut value --- */  pval = tpl_colval(*(tpls-1), attid);  best = WORTHLESS;             /* note the class and the value and */  *cut = UV_FLT;                /* init. the worth and the cut value */  while (1) {                   /* traverse the tuples above the cut */    val = tpl_colval(*tpls, attid);  /* if next value differs */    if ((type == AT_INT) ? (val->i > pval->i)    :     (0.5F *(pval->f +val->f) > pval->f)) {      curr = ft_eval((FRQTAB*)gi->numt, gi->measure, gi->params);      if (curr >= best) {       /* evaluate the frequency table, */        best = curr;            /* update the best cut worth and */        ft_copy((FRQTAB*)gi->curr, gi->numt);  /* copy the table */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -