⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dtree2.orig

📁 dTree是一个运行在WinCE上的文件管理软件。类似文件管理器,功能强大
💻 ORIG
📖 第 1 页 / 共 5 页
字号:
        *cut = (type == AT_INT) /* depending on the attribute type */             ? (0.5F *((float)val->i +pval->i))             : (0.5F *(       val->f +pval->f));      }                         /* compute the cut value as the */    }                           /* average of the adjacent values */    if (--n <= 0) break;        /* if on the last tuple, abort loop */    sum -= wgt = tpl_info(*tpls)->f;  /* get and sum the tuple weight */    if (sum < gi->mincnt)       /* if the weight of the remaining */      break;                    /* tuples is too small, abort loop */    cls = tpl_colval(*tpls, k)->i;    ft_move((FRQTAB*)gi->numt, 1, 0, cls, wgt);    pval = val; tpls++;         /* move weight in frequency table, */  }                             /* note the value, and get next tuple */  return best;                  /* return worth of best cut */}  /* _sym_num() *//*----------------------------------------------------------------------  Evaluation Functions for Numeric Target Attributes----------------------------------------------------------------------*/static double _num_sym (GROW *gi, TUPLE **tpls, int n,                        int attid, float *cut){                               /* --- evaluate a symbolic attribute */  int    i, k;                  /* class attribute id, loop variable */  INST   *inst;                 /* instance of the target attribute */  double val;                   /* value of candidate test attribute */  int    vid;                   /* identifier of attribute value */  assert(gi && tpls && (n > 0));/* check the function arguments */  k = att_valcnt(as_att(gi->attset, attid));  vt_init((VARTAB*)gi->curr, k);  i = gi->dtree->trgid;         /* initialize the variation table */  for (tpls += n; --n >= 0; ) { /* and traverse the tuples */    inst = tpl_colval(*--tpls, i);    val  = (double)((gi->type == AT_INT) ? inst->i : inst->f);    vid  = tpl_colval(*tpls, attid)->i;    vt_add((VARTAB*)gi->curr, vid, val, tpl_info(*tpls)->f);  }                             /* fill the variation table */  vt_calc(gi->curr);            /* and calculate aggregates */  if (gi->flags & DT_SUBSET)    /* if only called to compute the */    return 0;                   /* initial table, abort the function */  for (i = 0; --k >= 0; )       /* count reasonable tuple sets */    if (vt_colfrq((VARTAB*)gi->curr, k) >= gi->mincnt)      i++;                      /* there must be at least two values */  if (i < 2) return WORTHLESS;  /* with at least _mincnt cases each */  return vt_eval((VARTAB*)gi->curr, gi->measure, gi->params);}  /* _num_sym() */             /* evaluate the variation table *//*--------------------------------------------------------------------*/static double _num_set (GROW *gi, TUPLE **tpls, int n,                        int attid, float *cut){                               /* -- evaluate subsets of sym. att. */  int    i, k;                  /* loop variables */  int    valcnt;                /* number of values/subsets */  int    rtscnt;                /* number of reasonable tuple sets */  int    *sets;                 /* to traverse subset index vector */  int    isrc, idst, imax;      /* indices     of sets to combine */  double fsrc, fdst, fmax;      /* frequencies of sets to combine */  double curr, prev, best;      /* current/previous/best worth */  assert(gi && tpls && (n > 0));     /* check the function arguments */  _num_sym(gi, tpls, n, attid, cut); /* compute initial freq. table */  /* --- collect supported values --- */  valcnt = att_valcnt(as_att(gi->attset, attid));  imax   = rtscnt = 0;          /* initialize the variables and */  fmax   = 0; sets = gi->sets;  /* traverse the set index vector */  for (i = 0; i < valcnt; i++){ /* and the attribute values */    if (vt_dest((VARTAB*)gi->curr, i) != i)      continue;                 /* skip linked/combined values */    fdst = vt_colfrq((VARTAB*)gi->curr, i);    if (fdst <= 0) continue;    /* skip unsupported attribute values */    *sets++ = i;                /* note value index in index vector */    if (fdst > fmax) { fmax = fdst; imax = i; }    if (fdst >= gi->mincnt) rtscnt++;  }                             /* count reasonable tuple sets */  valcnt = (int)(sets -gi->sets);  sets   = gi->sets;            /* compute new number of values */  /* --- find best pair mergers --- */  prev = vt_eval((VARTAB*)gi->curr, gi->measure, gi->params);  if (valcnt <= 2)              /* check whether a merge is possible */    return (rtscnt >= 2) ? prev : WORTHLESS;  while (1) {                   /* select and merge loop */    best = WORTHLESS;           /* initialize worth of partition */    isrc = idst = 0;            /* and value set indices */    for (i = 0; i < valcnt; i++) {  /* traverse the value sets */      if ((rtscnt <= 2)         /* if less than two reasonable sets, */      &&  (i == imax)) continue;    /* skip mergers with largest set */      for (k = i+1; k < valcnt; k++) {    /* traverse remaining sets */        if ((rtscnt <= 2)       /* if less than two reasonable sets, */        &&  (k == imax)) continue;  /* skip mergers with largest set */        vt_comb((VARTAB*)gi->curr, sets[k], sets[i]);/* combine sets */        curr = vt_eval(gi->curr, gi->measure, gi->params);        vt_uncomb((VARTAB*)gi->curr, sets[k]); /* evaluate partition, */        if (curr <= best) continue;            /* then uncombine sets */        best = curr;            /* if the current partition is better */        isrc = k;               /* then the best one found up to now, */        idst = i;               /* note worth and value set indices */      }                         /* of the current pair merger */    }    if ((best <= WORTHLESS)     /* if no useful partition found */    ||  (best <  prev))         /* or best partition found is worse */      break;                    /* than the previous one, abort loop */    prev = best;                /* note the best worth */    fsrc = vt_colfrq((VARTAB*)gi->curr, sets[isrc]);    if (fsrc >= gi->mincnt)     /* get the source frequencies */      rtscnt--;                 /* and update reasonable tuple sets */    fdst = vt_colfrq((VARTAB*)gi->curr, sets[idst]);    if (fdst >= gi->mincnt)     /* get the destination frequency */      rtscnt--;                 /* and update reasonable tuple sets */    vt_comb(gi->curr, sets[isrc], sets[idst]);    fdst = vt_colfrq((VARTAB*)gi->curr, sets[idst]);    if (fdst >= gi->mincnt)     /* combine sets, get new frequency, */      rtscnt++;                 /* and update reasonable tuple sets */    if (fdst > fmax) { fmax = fdst; imax = idst; }    if (imax > isrc) imax--;    /* adapt vector index of largest set */    for (i = isrc; i < valcnt-1; i++)      sets[i] = sets[i+1];      /* remove source set from vector */    valcnt--;                   /* (shift to preserve value order) */  }                             /* and reduce number of values */  if (rtscnt < 2) return WORTHLESS;  return prev;                  /* return the evaluation result */}  /* _num_set() *//*--------------------------------------------------------------------*/static double _num_num (GROW *gi, TUPLE **tpls, int n,                        int attid, float *cut){                               /* --- evaluate a numeric attribute */  int    i, k;                  /* loop variable, target att. index */  int    type, t;               /* type of the test/target attribute */  INST   *inst;                 /* target instance of current tuple */  double trg;                   /* target value    of current tuple */  INST   *val, *pval;           /* value of current and prec. tuple */  double wgt,  sum = 0;         /* tuple weight, sum of tuple weights */  double curr, best;            /* current and best cut worth */  assert(gi && tpls && (n > 0) && cut);   /* check the function args. */  /* --- build initial variation table --- */  type = att_type(as_att(gi->attset, attid));  v_sort(tpls, n, (type == AT_FLT) ? _cmp_flt : _cmp_int, (void*)attid);  vt_init((VARTAB*)gi->numt,2); /* sort tuples and initialize table */  k = gi->dtree->trgid;         /* note the target attribute index */  t = gi->dtree->type;          /* and the target type */  while (1) {                   /* traverse tuples with unknown value */    if (n <= 1)                 /* if there is at most one tuple */      return WORTHLESS;         /* with a known att. value, abort */    val = tpl_colval(*tpls, attid);   /* get the next value */    if ((type == AT_INT) ? (val->i > UV_INT)    :                      (val->f > UV_FLT))      break;                    /* if the value is known, abort loop */    n--;                        /* go to the next tuple */    inst = tpl_colval(*tpls, k);    trg  = (double)((t == AT_INT) ? inst->i : inst->f);    wgt  = tpl_info(*tpls++)->f;/* get the target value */    vt_add((VARTAB*)gi->numt, -1, trg, wgt);  }                             /* store the tuple weight */  do {                          /* traverse tuples with known value */    if (--n <= 0)               /* if there is no tuple left for */      return WORTHLESS;         /* the other side of the cut, abort */    inst = tpl_colval(*tpls, k);    trg  = (double)((t == AT_INT) ? inst->i : inst->f);    wgt  = tpl_info(*tpls++)->f;      /* get the target value and */    vt_add(gi->numt, 0, trg, wgt);    /* store the tuple weight */    sum += wgt;                       /* in the variation table */  } while (sum < gi->mincnt);   /* while minimal size not reached */  for (i = n; --i >= 0; ) {     /* traverse the remaining tuples */    inst = tpl_colval(tpls[i], k);    trg  = (double)((t == AT_INT) ? inst->i : inst->f);    wgt  = tpl_info(tpls[i])->f;/* get the target value */    vt_add((VARTAB*)gi->numt, 1, trg, wgt);      }                             /* store the tuple weight */  vt_calc((VARTAB*)gi->numt);   /* calculate aggregates and get */  sum = vt_colfrq((VARTAB*)gi->numt,1); /* weight of upper part */  if (sum < gi->mincnt) return WORTHLESS;  /* --- find the best cut value --- */  pval = tpl_colval(*(tpls-1), attid);  best = WORTHLESS;             /* note the class and the value and */  *cut = UV_FLT;                /* init. the worth and the cut value */  while (1) {                   /* traverse the tuples above the cut */    val = tpl_colval(*tpls, attid);  /* if next value differs */    if ((type == AT_INT) ? (val->i > pval->i)    :     (0.5F *(pval->f +val->f) > pval->f)) {      curr = vt_eval((VARTAB*)gi->numt, gi->measure, gi->params);      if (curr >= best) {       /* evaluate the variation table, */        best = curr;            /* update the best cut worth, and */        vt_copy((VARTAB*)gi->curr, gi->numt);   /* copy the table */        *cut = (type == AT_INT) /* depending on the attribute type */             ? (0.5F *((float)val->i +pval->i))             : (0.5F *(       val->f +pval->f));      }                         /* compute the cut value as the */    }                           /* average of the adjacent values */    if (--n <= 0) break;        /* if on the last tuple, abort loop */    sum -= wgt = tpl_info(*tpls)->f;  /* get and sum the tuple weight */    if (sum < gi->mincnt)       /* if the weight of the remaining */      break;                    /* tuples is too small, abort loop */    inst = tpl_colval(*tpls, k);    trg  = (double)((t == AT_INT) ? inst->i : inst->f);    vt_move((VARTAB*)gi->numt, 1, 0, trg, wgt);    pval = val; tpls++;         /* move weight in variation table, */  }                             /* note the value, and get next tuple */  return best;                  /* return worth of best cut */}  /* _num_num() *//*----------------------------------------------------------------------  Growing Functions----------------------------------------------------------------------*/static DTNODE* _grow (GROW *gi, DTNODE *leaf, TUPLE **tpls, int n){                               /* --- recursively grow tree */  int    i, k;                  /* loop variables, buffers */  int    mark;                  /* buffer for attribute mark */  ATT    *att;                  /* current/test attribute */  double curr, best;            /* current and best worth */  float  cut;                   /* cut value for best attribute */  void   *tab;                  /* exchange buffer for eval. tables */  DTNODE *node;                 /* created test node (subtree) */  DTDATA *data;                 /* to traverse the data vector */  TSEL   tsel;                  /* tuple selection information */  GRPFN  *group;                /* tuple grouping function */  double frq, known;            /* tuple frequencies for weighting */  double e_tree;                /* sum of subtree errors */  assert(leaf && tpls && (n > 0)); /* check the function arguments */  /* --- check the leaf node --- */  gi->err = leaf->err;          /* set the number of leaf errors */  if ((leaf->frq < 2*gi->mincnt)/* if too few tuples to divide */  ||  (leaf->err < MINERROR)    /* or all cases have the same value */  ||  (gi->maxht <= 0))         /* or maximal tree height reached, */    return leaf;                /* simply return the leaf node */  /* --- search for a test attribute --- */  best       = WORTHLESS;       /* clear worth of best attribute, */  tsel.col   = -1;              /* identifier of best attribute, */  tsel.val.f = cut = UV_FLT;    /* and cut value (numeric atts.) */  k = as_attcnt(gi->attset);    /* get the number of attributes */  for (i = 0; i < k; i++) {     /* traverse the attributes, */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -