⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dtree2.orig

📁 dTree是一个运行在WinCE上的文件管理软件。类似文件管理器,功能强大
💻 ORIG
📖 第 1 页 / 共 5 页
字号:
    att = as_att(gi->attset,i); /* but skip marked attributes */    if (att_getmark(att) < 0) continue;    if (att_type(att) == AT_SYM) curr = gi->eval_sym(gi, tpls,n,i,NULL);    else                         curr = gi->eval_num(gi, tpls,n,i,&cut);    if (gi->flags & DT_EVAL) {  /* if only to evaluate attributes */      gi->evals[i] = curr;      /* store the attribute evaluation */      att_info(att)->f = (att_type(att) == AT_SYM) ? 0 : cut;    }                           /* store a possible cut value */    if (curr <= best) continue; /* evaluate attribute (compute worth) */    best       = curr;          /* if the current worth is better */    tsel.col   = i;             /* than that of the best attribute, */    tsel.val.f = cut;           /* note worth, attribute id, and cut */    tab        = gi->best;      /* exchange the freq./var. tables, */    gi->best   = gi->curr;      /* so that the current table */    gi->curr   = tab;           /* becomes the best table */  }                             /* (much more efficient than copying) */  if ((gi->flags & DT_LEAF)     /* if only to evaluate attributes */  ||  (tsel.col < 0)            /* or no test attribute found */  ||  (best     < gi->minval))  /* or the best attribute is not */    return leaf;                /* good enough, abort the function */  /* --- create a test node --- */  att  = as_att(gi->attset, tsel.col);  i    = (att_type(att) == AT_SYM) ? att_valcnt(att) : 2;  node = (DTNODE*)malloc(sizeof(DTNODE) +(i-1) *sizeof(DTDATA));  if (!node) { gi->err = -1; return leaf; }  node->flags = 0;              /* create a test node */  node->attid = tsel.col;       /* set attribute id */  node->cut   = tsel.val.f;     /* and cut value */  node->frq   = leaf->frq;      /* copy the node frequency, */  node->err   = leaf->err;      /* the number of errors, */  node->trg   = leaf->trg;      /* and the target value */  node->size  = i;              /* set the data vector size */  /* --- create child nodes --- */  for (data = node->vec +(k = node->size); --k >= 0; ) {    if (gi->type == AT_SYM) i = ft_dest(gi->best, k);    else                    i = vt_dest(gi->best, k);    if (i == k)                 /* if the value is not linked, */      (--data)->link = NULL;    /* clear the link pointer */    else {                      /* if the value is linked */      (--data)->link = node->vec +i;      node->flags |= DT_LINK;   /* set a link to the dest. value */    }                           /* and set the flag for node links */  }                             /* (init. pointers for cleanup) */  if (gi->type == AT_SYM) {     /* if the target is symbolic */    for (data += k = node->size; --k >= 0; ) {      if (!(--data)->link       /* traverse all uncombined values */      &&  (ft_frq_x((FRQTAB*)gi->best, k) >= EPSILON)) {        data->child = _leaf_sym(gi->dtree, (FRQTAB*)gi->best, k);        if (!data->child) { _delete(node); gi->err = -1; return leaf; }      }                         /* create a leaf node */    }                           /* for all supported values */    frq = ft_known((FRQTAB*)gi->best); }  else {                        /* if the target is numeric */    for (data += k = node->size; --k >= 0; ) {      if (!(--data)->link       /* traverse all uncombined values */      &&  (vt_colfrq((VARTAB*)gi->best, k) >= EPSILON)) {        data->child = _leaf_num(gi->dtree, (VARTAB*)gi->best, k);        if (!data->child) { _delete(node); gi->err = -1; return leaf; }      }                         /* create a leaf node */    }                           /* for all supported values */    frq = vt_known((VARTAB*)gi->best);  }                             /* note the sum of the weights of */  known  = frq;                 /* tuples with a known att. value */  e_tree = 0;                   /* clear the number of errors */  gi->maxht--;                  /* reduce the maximal tree height */  /* --- branch on symbolic attribute --- */  if (att_type(att) == AT_SYM){ /* if the test attribute is symbolic */    mark = att_getmark(att);    /* mark attribute (to prevent tests) */    if (!(gi->flags & DT_SUBSET)) att_setmark(att, -1);    tsel.val.i = UV_SYM;        /* group tuples with */    i = _grp_sym(tpls,n,&tsel); /* an unknown attribute value */    group    = (node->flags & DT_LINK) ? _grp_set : _grp_sym;    frq      = known;           /* get the denominator of the weight */    tsel.vec = data;            /* traverse the attribute values */    for (data += tsel.val.i = node->size; --tsel.val.i >= 0; ) {      if (!(--data)->child      /* if an att. value is not supported */      ||  islink(data, node))   /* or combined with another value, */        continue;               /* skip the attribute value */      k = group(tpls+i, n-i, &tsel); /* group tuples with next value */      if (i > 0) {              /* if there are unknown values */        _mul_wgt(tpls, i, data->child->cut/frq);        frq = data->child->cut; /* weight tuples with unknown value, */      }                         /* note the denom. for reweighting */      data->child = _grow(gi, data->child, tpls, k+i);      if (gi->err < 0) { _delete(node); return leaf; }      e_tree += gi->err;        /* grow a leaf/subtree for the value */      if (i > 0)                /* if there are unknown values, */        group(tpls,k+i, &tsel); /* regroup tuples with current value */      tpls += k; n -= k;        /* and skip processed tuples */    }                               att_setmark(att, mark);     /* restore mark of test attribute */    if (i > 0) _mul_wgt(tpls, i, known/frq);  }                             /* reweight tuples with unknowns */  /* --- branch on numeric attribute --- */  else {                        /* if the test attribute is numeric */    if (att_type(att) == AT_FLT) {              group = _grp_flt; }    else { tsel.val.i = (int)floor(tsel.val.f); group = _grp_int; }    k = group(tpls, n, &tsel);  /* group tuples greater than the cut */    group = (att_type(att) == AT_FLT) ? _grp_uvflt : _grp_uvint;    i = group(tpls+k,n-k,&tsel);/* group tuples with unknown value */    if (i > 0) {                /* if there are unknown values */      _mul_wgt(tpls+k, i, data[0].child->cut/known);      frq = data[0].child->cut; /* weight tuples with unknown value */    }                           /* and note freq. for reweighting */    data[0].child = _grow(gi, data[0].child, tpls+k, n-k);    if (gi->err < 0) { _delete(node); return leaf; }    e_tree += gi->err;          /* grow a leaf/subtree for <= cut */    if (i > 0) {                /* if there are unknown values, */      group(tpls+k,n-k, &tsel); /* regroup tuples with unknown value */      _mul_wgt(tpls+k, i, data[1].child->cut/frq);      frq = data[1].child->cut; /* weight tuples with unknown value */    }                           /* and note freq. for reweighting */    data[1].child = _grow(gi, data[1].child, tpls, k+i);    if (gi->err < 0) { _delete(node); return leaf; }    e_tree += gi->err;          /* grow a leaf/subtree for > cut */    if (i > 0) {                /* if there are unknown values, */      group(tpls, k+i, &tsel);  /* regroup tuples with unknowns */      _mul_wgt(tpls, i, known/frq);    }                           /* reweight tuples with unknown value */  }  gi->maxht++;                  /* restore maximal (sub)tree height */  /* --- test subtree against leaf --- */  if (!(gi->flags & DT_NOPRUNE)  &&  (leaf->err < e_tree *(1+EPSILON))) {    _delete(node);              /* if the leaf is not worse */    gi->err = leaf->err;        /* than the subtree, */    return leaf;                /* discard the subtree */  }                             /* and return the leaf */  free(leaf);                   /* if the subtree is better, */  gi->err = e_tree;             /* discard the leaf and */  return node;                  /* return the subtree */}  /* _grow() *//*--------------------------------------------------------------------*/static DTREE* _cleanup (GROW *gi, int err){                               /* --- clean up after an error */  if (gi->type == AT_SYM) {     /* if the target is symbolic */    if (gi->curr) ft_delete(gi->curr);    if (gi->best) ft_delete(gi->best);    if (gi->numt) ft_delete(gi->numt); }  else {                        /* if the target is numeric */    if (gi->curr) vt_delete(gi->curr);    if (gi->best) vt_delete(gi->best);    if (gi->numt) vt_delete(gi->numt);  }                             /* delete frequency/variation tables */  if (gi->tpls) free(gi->tpls); /* and the tuple vector */  if (err) {                    /* if to clean up after an error */    if (gi->dtree) dt_delete(gi->dtree, 0);    if (gi->attset && (gi->flags & DT_DUPAS))      as_delete(gi->attset);    /* delete grown decision tree and */  }                             /* corresponding attribute set */  free(gi); return NULL;        /* delete tree grow information */}  /* _cleanup() *//*--------------------------------------------------------------------*/DTREE* dt_grow (TABLE *table, int trgid, int measure, double *params,                double minval, int maxht, float mincnt, int flags){                               /* --- grow dec./reg. tree from table */  int    i, m;                  /* loop variable, flagless measure */  TUPLE  **tp;                  /* to traverse the tuple vector */  int    tplcnt;                /* number of tuples */  int    valcnt;                /* number of possible values */  int    maxcnt;                /* maximal number of values */  ATTSET *attset;               /* attribute set */  ATT    *att;                  /* target attribute */  int    mark;                  /* mark     of the target attribute */  INST   *inst;                 /* instance of the target attribute */  double val;                   /* value    of the target attribute */  GROW   *gi;                   /* tree grow information */  DTREE  *dt;                   /* created decision tree */  assert(table);                /* check the function argument */  /* --- create an empty decision/regression tree --- */  attset = tab_attset(table);   /* get the attribute set */  for (maxcnt = 2, i = as_attcnt(attset); --i >= 0; ) {    if (i == trgid) continue;   /* traverse attributes except target */    valcnt = att_valcnt(as_att(attset, i));    if (valcnt > maxcnt) maxcnt = valcnt;  }                             /* determine maximum number of values */  i  = (flags & DT_SUBSET) ? maxcnt : 1;  gi = (GROW*)calloc(1, sizeof(GROW) +(i-1) *sizeof(int));  if (!gi) return NULL;         /* create the tree grow information */  gi->flags  = flags;           /* and store the induction flags */  if (flags & DT_DUPAS) attset = as_dup(attset);  if (!attset) return _cleanup(gi, 1);  gi->attset = attset;          /* duplicate att. set if requested */  gi->type   = att_type(as_att(attset, trgid));  if (gi->type == AT_SYM) {     /* if the target att. is symbolic */    m = measure & ~FEF_WGTD;    /* remove the flags from measure code */    if ((m <= FEM_NONE) || (m >= FEM_UNKNOWN))      measure = m = FEM_NONE; } /* check the selection measure code */  else {                        /* if the target att. is numeric */    m = measure & ~VEF_WGTD;    /* remove the flags from measure code */    if ((m <= VEM_NONE) || (m >= VEM_UNKNOWN))      measure = m = VEM_NONE;   /* check the selection measure code */  }  gi->measure = measure;        /* note measure, parameters, */  gi->params  = params;         /* minimal value for a split, */  gi->minval  = minval;         /* and minimal number of cases */  gi->mincnt  = (mincnt <= 0) ? (float)EPSILON : mincnt;  gi->maxht   = (maxht  <= 0) ? INT_MAX : (maxht -1);  gi->flags   = flags;          /* note maximal tree height and flags */  if (flags & DT_EVAL) {        /* if only to evaluate attributes */    gi->evals = (double*)malloc(as_attcnt(attset) *sizeof(double));    if (!gi->evals) return _cleanup(gi, 1);    att_info(as_att(attset, trgid))->p = gi->evals;  }                             /* create an evaluation vector */  /* --- create decision/regression tree --- */  dt = dt_create(attset, trgid);  if (!dt) return _cleanup(gi, 1);  gi->dtree = dt;               /* create an empty decision tree */  tp = _tuples(table, trgid, &tplcnt);  if (!tp) return _cleanup(gi, 1);  gi->tpls = tp;                /* collect tuples with known class */  /* --- create a root node --- */  if (gi->type == AT_SYM) {     /* if the target att. is symbolic */    gi->numt = ft_create(2, dt->clscnt);    if (!gi->numt) return _cleanup(gi, 1);    ft_init((FRQTAB*)gi->numt, 1, dt->clscnt);                                /* create and init. a freq. table */    for (tp += i = tplcnt; --i >= 0;) {      inst = tpl_colval(*--tp, trgid);      ft_add((FRQTAB*)gi->numt, 0, inst->i, tpl_info(*tp)->f);    }                           /* aggregate the tuple weight and */    ft_marg(gi->numt);          /* marginalize the frequency table */    if (ft_known((FRQTAB*)gi->numt) > 0) {  /* if there are tuples */      dt->root = _leaf_sym(dt, gi->numt, 0);      if (!dt->root) return _cleanup(gi, 1);    } }                         /* create a leaf node as the root */   else {                        /* if the target att. is numeric

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -