📄 dti.c
字号:
/*---------------------------------------------------------------------- File : dti.c Contents: decision and regression tree induction Authors : Christian Borgelt History : 08.08.1997 file created 08.09.1997 class checks corrected 17.09.1997 option '-a' (aligned output) added 11.01.1998 unknown value characters (option -u) added 08.02.1998 adapted to changed parse functions 09.02.1998 adapted to changed order of evaluation measures 30.03.1998 bug in class check removed 23.06.1998 adapted to modified attset functions 29.09.1998 table reading simplified 20.10.1998 output of relative class frequencies added 09.02.1999 input from stdin, output to stdout added 17.04.1999 simplified using the new module 'io' 30.04.1999 log messages improved 25.10.1999 bug in initialization of 'minval' fixed 29.10.1999 evaluation measure FEM_WDIFF added 18.12.2000 extended to regression trees 02.03.2001 evaluation measure FEM_INFGBAL added 23.07.2001 adapted to modified module scan 11.02.2002 evaluation measures coded with names 02.02.2002 adapted to modified list of evaluation measures 04.02.2002 quadratic information measures added 16.08.2003 slight changes in error message output 26.05.2004 measure selection made more flexible 21.07.2004 option -x added (attribute evaluation)----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <assert.h>#ifndef AS_RDWR#define AS_RDWR#endif#ifndef AS_PARSE#define AS_PARSE#endif#ifndef TAB_RDWR#define TAB_RDWR#endif#ifndef DT_GROW#define DT_GROW#endif#include "io.h"#include "dtree.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "dti"#define DESCRIPTION "decision and regression tree induction"#define VERSION "version 3.13 (2004.08.12) " \ "(c) 1997-2004 Christian Borgelt"/* --- error codes --- */#define OK 0 /* no error */#define E_NONE 0 /* no error */#define E_NOMEM (-1) /* not enough memory */#define E_FOPEN (-2) /* cannot open file */#define E_FREAD (-3) /* read error on file */#define E_FWRITE (-4) /* write error on file */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* wrong number of arguments */#define E_STDIN (-8) /* double assignment of stdin */#define E_PARSE (-9) /* parse error on domain file */#define E_BALANCE (-10) /* unknown balancing mode */#define E_TARGET (-11) /* missing a target attribute */#define E_MEASURE (-12) /* unknown selection measure */#define E_MINCNT (-13) /* illegal minimal number of tuples */#define E_UNKNOWN (-14) /* unknown error *//*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef struct { /* --- measure information --- */ int code; /* measure code */ char *name; /* name of the measure */} MINFO; /* (measure information) *//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*//* --- measures for symbolic targets --- */static const MINFO symtab[] = { { FEM_NONE, "none" }, /* no measure */ { FEM_INFGAIN, "infgain" }, /* information gain */ { FEM_INFGBAL, "infgbal" }, /* balanced information gain */ { FEM_INFGR, "infgr" }, /* information gain ratio */ { FEM_INFSGR1, "infsgr1" }, /* sym. information gain ratio 1 */ { FEM_INFSGR2, "infsgr2" }, /* sym. information gain ratio 2 */ { FEM_QIGAIN, "qigain" }, /* quadratic information gain */ { FEM_QIGBAL, "qigbal" }, /* balanced quad. information gain */ { FEM_QIGR, "qigr" }, /* quadratic information gain ratio */ { FEM_QISGR1, "qisgr1" }, /* sym. quad. info. gain ratio 1 */ { FEM_QISGR2, "qisgr2" }, /* sym. quad. info. gain ratio 2 */ { FEM_GINI, "gini" }, /* gini index */ { FEM_GINISYM, "ginisym" }, /* symmetric gini index */ { FEM_GINIMOD, "ginimod" }, /* modified gini index */ { FEM_RELIEF, "relief" }, /* relief measure */ { FEM_WDIFF, "wdiff" }, /* weighted differences */ { FEM_CHI2, "chi2" }, /* chi^2 measure */ { FEM_CHI2NRM, "chi2nrm" }, /* normalized chi^2 measure */ { FEM_WEVID, "wevid" }, /* weight of evidence */ { FEM_RELEV, "relev" }, /* relevance */ { FEM_BDM, "bdm" }, /* Bayesian-Dirichlet / K2 metric */ { FEM_BDMOD, "bdmod" }, /* modified BD / K2 metric */ { FEM_RDLREL, "rdlrel" }, /* red. of description length 1 */ { FEM_RDLABS, "rdlabs" }, /* red. of description length 2 */ { FEM_STOCO, "stoco" }, /* stochastic complexity */ { FEM_SPCGAIN, "spcgain" }, /* specificity gain */ { FEM_SPCGBAL, "spcgbal" }, /* balanced specificity gain */ { FEM_SPCGR, "spcgr" }, /* specificity gain ratio */ { FEM_SPCSGR1, "spcsgr1" }, /* sym. specificity gain ratio 1 */ { FEM_SPCSGR2, "spcsgr2" }, /* sym. specificity gain ratio 2 */ { -1, NULL } /* sentinel */};/* --- measures for numeric targets --- */static const MINFO numtab[] = { { VEM_NONE, "none" }, /* no measure */ { VEM_SSE, "sse" }, /* sum of squared errors */ { VEM_MSE, "mse" }, /* mean squared error */ { VEM_RMSE, "rmse" }, /* square root of mean squared error */ { VEM_VAR, "var" }, /* variance (unbiased estimator) */ { VEM_SDEV, "sd" }, /* standard deviation (from variance) */ { -1, NULL } /* sentinel */};/* --- error messages --- */static const char *errmsgs[] = { /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_STDIN -8 */ "double assignment of standard input\n", /* E_PARSE -9 */ "parse error(s) on file %s\n", /* E_BALANCE -10 */ "unknown balancing mode %c\n", /* E_TARGET -11 */ "missing target \"%s\" in file %s\n", /* E_MEASURE -12 */ "unknown attribute selection measure %s\n", /* E_MINCNT -13 */ "illegal minimal number of tuples %g\n", /* E_UNKNOWN -14 */ "unknown error\n"};/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/const char *prgname = NULL; /* program name for error messages */static SCAN *scan = NULL; /* scanner */static ATTSET *attset = NULL; /* attribute set */static TABLE *table = NULL; /* table */static DTREE *dtree = NULL; /* decision/regression tree */static FILE *out = NULL; /* output file *//*---------------------------------------------------------------------- Functions----------------------------------------------------------------------*/static void error (int code, ...){ /* --- print error message */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print error message */ va_end(args); /* end argument evaluation */ } #ifndef NDEBUG if (dtree) dt_delete(dtree, 0); if (table) tab_delete(table, 0); if (attset) as_delete(attset); /* clean up memory */ if (scan) sc_delete(scan); /* and close files */ if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */} /* error() *//*--------------------------------------------------------------------*/static void help (void){ /* --- print help on sel. measures */ int i; /* loop variable */ fprintf(stderr, "\n"); /* terminate startup message */ printf("\nList of attribute selection measures (option -e#)\n"); printf("Measures for symbolic target attributes:\n"); printf(" name measure\n"); for (i = 0; symtab[i].name; i++) printf(" %-9s %s\n", symtab[i].name, ft_mname(symtab[i].code)); printf("\nMeasures wdiff, bdm, bdmod, rdlrel, rdlabs " "take a sensitivity\n" "parameter (-z#, default: 0, i.e. normal sensitivity)\n"); printf("Measures bdm and bdmod take a prior (-p#, positive number)\n" "or an equivalent sample size (-p#, negative number).\n"); printf("\nMeasures for numeric target attributes:\n"); printf(" name measure\n"); for (i = 0; numtab[i].name; i++) printf(" %-9s %s\n", numtab[i].name, vt_mname(numtab[i].code)); exit(0); /* print a list of selection measures */} /* help() */ /* and abort the program *//*--------------------------------------------------------------------*/static int code (const MINFO *tab, const char *name){ /* --- get measure code */ for ( ; tab->name; tab++) /* look up name in table */ if (strcmp(tab->name, name) == 0) return tab->code; /* return the measure code */ return -1; /* or an error indicator */} /* code() *//*--------------------------------------------------------------------*/int main (int argc, char* argv[]){ /* --- main function */ int i, k = 0, n, w; /* loop variables, counter */ char *s; /* to traverse options */ char **optarg = NULL; /* option argument */ char *fn_dom = NULL; /* name of domain file */ char *fn_hdr = NULL; /* name of table header file */ char *fn_tab = NULL; /* name of table file */ char *fn_dt = NULL; /* name of dec./reg. tree file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *uvchars = NULL; /* unknown value characters */ char *trgname = NULL; /* target attribute name */ char *mname = NULL; /* name of att. selection measure */ int measure = 3; /* attribute selection measure */ int wgtd = FEF_WGTD; /* flag for weighted measure */ double params[2] = { 0, 0 }; /* selection measure parameters */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -