⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dtree1.c

📁 dTree是一个运行在WinCE上的文件管理软件。类似文件管理器,功能强大
💻 C
📖 第 1 页 / 共 4 页
字号:
/*----------------------------------------------------------------------  File    : dtree1.c  Contents: decision and regression tree management, base functions  Author  : Christian Borgelt  History : 12.06.1997 file created (as dtree.c)            08.08.1997 first version completed            25.08.1997 multiple child references made possible            30.08.1997 treatment of new values during pruning enabled            25.08.1997 multiple child references replaced by links            02.02.1998 links between children simplified            03.02.1998 adapted to changed interface of module 'table'            27.05.1998 treatment of val == cut in _exec changed            31.05.1998 rule extraction adapted to interface changes            15.10.1998 major revision completed            20.10.1998 output of relative class frequencies added            15.12.1998 minor improvements, assertions added            13.11.1999 bug in parse error reporting removed            02.12.2000 growing and pruning functions moved to dtree2.c            16.12.2000 decision tree parsing simplified            17.12.2000 regression tree functionality added            22.07.2001 all global variables removed            27.08.2001 bug in _aggr concerning node->cls fixed            12.10.2001 parse error recovery corrected/improved            11.01.2002 number of attributes stored in DTREE structure            12.08.2004 adapted to new module parse----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "vecops.h"#include "dtree.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define EPSILON     1e-6        /* to handle roundoff errors */#define BS_PATH     32          /* block size for the path vector */#define islink(d,n) (((d)->link >= (n)->vec) && \                     ((d)->link <  (n)->vec +(n)->size))/*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef struct {                /* --- decision tree output info --- */  FILE    *file;                /* output file */  ATTSET  *attset;              /* attribute set of decision tree */  int     type;                 /* target attribute type */  int     ind;                  /* indentation (in characters) */  int     pos;                  /* position in output line */  int     max;                  /* maximal line length */  int     mode;                 /* description mode, e.g. DT_ALIGN */  char    buf[4*AS_MAXLEN+68];  /* output buffer */} DTOUT;                        /* (decision tree output info) */#ifdef DT_RULEStypedef struct {                /* --- rule extraction info --- */  RULESET *ruleset;             /* rule set to add to */  ATTSET  *attset;              /* attribute set */  int     type;                 /* target type */  int     subset[1];            /* subset of attribute values */} DTRULE;                       /* (rule extraction info) */#endif/*----------------------------------------------------------------------  Recursive Parts of Main Functions----------------------------------------------------------------------*/static void _delete (DTNODE *node){                               /* --- recursively delete a subtree */  int    i;                     /* loop variable */  DTDATA *data;                 /* to traverse the data vector */  assert(node);                 /* check the function argument */  if (!(node->flags & DT_LEAF)){/* if the node is no leaf */    for (data = node->vec +(i = node->size); --i >= 0; )      if ((--data)->child && !islink(data, node))        _delete(data->child);   /* recursively delete all subtrees, */  }                             /* avoiding empty subtrees and links */  free(node);                   /* delete the node itself */}  /* _delete() *//*--------------------------------------------------------------------*/static void _aggr (DTNODE *node, int n, double *buf){                               /* --- aggregate node data */  int    i;                     /* loop variable */  DTDATA *data, *dmax;          /* to traverse data vector */  double *frq,  *max;           /* to traverse the class frequencies */  double sum;                   /* sum of the class frequencies */  assert(node && buf);          /* check the function arguments */  /* --- leaf node --- */  if (node->flags & DT_LEAF) {  /* if the node is a leaf */    if (n < 0) {                /* if the target att. is numeric, */      buf[0] += node->frq;      /* compute sums over f, f*v, f*v*v */      buf[1] += sum = node->frq *node->trg.f;      buf[2] += node->err + sum *node->trg.f; }    else {                      /* if the target att. is symbolic, */      assert(node->size == n);  /* check the data vector size */      dmax = data = node->vec;  /* get pointers to the data vector */      *buf += sum = data->frq;  /* and start summing the frequencies */      while (--n > 0) {         /* traverse the data vector */        sum += (++data)->frq;   /* sum the class frequencies and */        *++buf += data->frq;    /* aggregate them for classes */        if (data->frq > dmax->frq) dmax = data;      }                         /* update the maximal frequency */      node->frq = (float)sum;   /* note the node frequency and */      node->err = sum -dmax->frq;      /* the number of errors */      node->trg.i = (int)(dmax -node->vec);    }                           /* determine the most freq. class */    return;                     /* leaves terminate the recursion, */  }                             /* so abort the function */  /* --- test node --- */  for (frq = buf +abs(n) +(i = abs(n)); --i >= 0; )    *--frq = 0;                 /* clear the next buffer section */  for (data = node->vec +(i = node->size); --i >= 0; ) {    if ((--data)->child && !islink(data, node))      _aggr(data->child, n, frq);  }                             /* aggregate data recursively */  if (n < 0) {                  /* if the target att. is numeric, */    buf[0] += frq[0];           /* aggregate sums over f, f*v, f*v*v */    buf[1] += frq[1];           /* for the next higher level and */    buf[2] += frq[2];           /* compute the node information */    sum = (frq[0] > 0) ? frq[1] /frq[0] : 0;    node->frq = (float)frq[0];  /* note the node frequency and the */    node->err = frq[2] -sum *frq[1];      /* sum of squared errors */    node->trg.f = (float)sum; } /* note the mean value for the pred. */  else {                        /* if the target att. is symbolic */    max = frq; *buf += sum = *frq;    while (--n > 0) {           /* traverse the class frequencies */      sum    += *++frq;         /* sum the class frequencies and */      *++buf += *frq;           /* aggregate them for classes */      if (*frq > *max) max = frq;    }                           /* update the maximal frequency */    node->frq = (float)sum;     /* note the node frequency */    node->err = sum -*max;      /* and the number of errors */    node->trg.i = (int)(max-buf-1);  }                             /* determine the most freq. class */}  /* _aggr() *//*--------------------------------------------------------------------*/static void _mark (ATTSET *attset, DTNODE *node){                               /* --- mark occurring attributes */  int    i;                     /* loop variable */  DTDATA *data;                 /* to traverse data vector */  assert(attset && node);       /* check the function arguments */  if (node->flags & DT_LEAF)    /* if the node is a leaf, abort, */    return;                     /* otherwise mark the test attribute */  att_setmark(as_att(attset, node->attid), 1);  for (data = node->vec +(i = node->size); --i >= 0; )    if ((--data)->child && !islink(data, node))      _mark(attset,data->child);/* recursively mark the attributes, */}  /* _mark() */                /* avoiding empty subtrees and links *//*----------------------------------------------------------------------  Main Functions----------------------------------------------------------------------*/DTREE* dt_create (ATTSET *attset, int trgid){                               /* --- create a decision tree */  int   k;                      /* size of the buffer */  int   colcnt;                 /* number of table columns */  int   clscnt;                 /* number of classes (sym. target) */  ATT   *att;                   /* target attribute */  int   type;                   /* type of the target attribute */  DTREE *dt;                    /* created decision tree */  assert(attset);               /* check the function argument */  colcnt = as_attcnt(attset);   /* get the number of columns */  if ((trgid < 0) || (trgid >= colcnt))    trgid = colcnt -1;          /* check and adapt the target id, */  att  = as_att(attset, trgid); /* then get the target attribute */  type = att_type(att);         /* and its type */  if (type != AT_SYM) {         /* if the target att. is numeric, */    clscnt = 0; k = 3; }        /* there are no classes */  else {                        /* if the target att. is symbolic */    clscnt = k = att_valcnt(att);    assert(clscnt >= 1);        /* get and check */  }                             /* the number of classes */  dt = (DTREE*)malloc(sizeof(DTREE));  if (!dt) return NULL;         /* create a dec./reg. tree body */  dt->buf = (double*)malloc(k *sizeof(double));  if (!dt->buf) { free(dt); return NULL; }                                /* create a buffer vector */  dt->attset = attset;          /* initialize the fields: */  dt->trgid  = trgid;           /* note the attribute set, */  dt->type   = type;            /* the target's id and type, */  dt->clscnt = clscnt;          /* and number of classes */  dt->attcnt = 1;               /* clear the number of attributes, */  dt->size   = dt->height = 0;  /* the number of nodes, the height, */  dt->total  = 0.0F;            /* the total of frequencies, */  dt->root   = dt->curr = NULL; /* and the node pointers */  dt->newp   = &(dt->root);     /* (no root node is created) */  dt->path   = NULL;            /* do not allocate a path */  dt->plen   = dt->pvsz = 0;    /* until it is needed */  return dt;                    /* return created decision tree */}  /* dt_create() *//*--------------------------------------------------------------------*/void dt_delete (DTREE *dt, int delas){                               /* --- delete a decision tree */  assert(dt);                   /* check argument */  if (dt->root) _delete(dt->root);     /* recursively delete nodes */  if (dt->path) free(dt->path);        /* delete the path vector */  if (dt->buf)  free(dt->buf);         /* and the buffer vector */  if (delas)    as_delete(dt->attset); /* delete attribute set */  free(dt);                     /* delete decision tree body */}  /* dt_delete() *//*--------------------------------------------------------------------*/void dt_up (DTREE *dt, int root){                               /* --- go up in decision tree */  assert(dt);                   /* check argument */  if (root || (dt->plen <= 0)) {    dt->curr = dt->root;        /* if to reset to root, */    dt->plen = 0; }             /* make the root the current node */  else                          /* if to go up one level */    dt->curr = dt->path[--dt->plen];}  /* dt_up() */                /* get preceding node in path *//*--------------------------------------------------------------------*/int dt_down (DTREE *dt, int index, int check){                               /* --- go down in decision tree */  DTNODE **vec;                 /* new path vector */  int    vsz;                   /* new size of path vector */  assert(dt);                   /* check arguments */  if (!dt->curr) return -2;     /* and current node */  assert((index >= 0) && (index < dt->curr->size));  if (check)                    /* if only to check for child node */    return (dt->curr->vec[index].child) ? 0 : 1;  if (dt->plen >= dt->pvsz) {   /* if the path vector is full, */    vsz = dt->pvsz +BS_PATH;    /* enlarge the path vector */    vec = (DTNODE**)realloc(dt->path, vsz *sizeof(DTNODE*));    if (!vec) return -1;        /* allocate a new path vector */    dt->path = vec; dt->pvsz = vsz;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -