📄 corr.c

📁 数据挖掘中的bayes算法,很好的代码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*----------------------------------------------------------------------  File    : corr.c  Contents: covariances/correlation coefficients computation program  Author  : Christian Borgelt  History : 1999.04.07 file created from file xmat.c            1999.04.10 first version completed            1999.04.11 options -x, -v, and -c added            1999.04.14 option -k (expected values from known pairs)            1999.12.01 bug in connection with option -d removed            2000.11.10 adapted to new module mvnorm            2002.06.07 LaTeX output option added            2003.08.16 slight changes in error message output            2004.04.22 bug in function dblout fixed (log(0))            2007.02.13 adapted to modified module tabscan----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <string.h>#include <float.h>#include <math.h>#include <assert.h>#include "symtab.h"#include "tabscan.h"#include "mvnorm.h"#ifdef STORAGE#include "storage.h"#endif/*----------------------------------------------------------------------  Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME     "corr"#define DESCRIPTION "compute covariance matrix/correlation coefficients"#define VERSION     "version 2.8 (2007.02.13)         " \                    "(c) 1999-2007   Christian Borgelt"/* --- sizes --- */#define BUFSIZE     512         /* size of read buffer */#define BLKSIZE      32         /* block size for column vector *//* --- error codes --- */#define OK            0         /* no error */#define E_NONE        0         /* no error */#define E_NOMEM     (-1)        /* not enough memory */#define E_FOPEN     (-2)        /* file open failed */#define E_FREAD     (-3)        /* file read failed */#define E_FWRITE    (-4)        /* file write failed */#define E_OPTION    (-5)        /* unknown option */#define E_OPTARG    (-6)        /* missing option argument */#define E_ARGCNT    (-7)        /* wrong number of arguments */#define E_STDIN     (-8)        /* double assignment of stdin */#define E_FLDNAME   (-9)        /* invalid field name */#define E_VALUE    (-10)        /* invalid value */#define E_EMPFLD   (-11)        /* empty field name */#define E_DUPFLD   (-12)        /* duplicate field name */#define E_FLDCNT   (-13)        /* wrong number of fields */#define E_UNKNOWN  (-14)        /* unknown error *//*----------------------------------------------------------------------  Type Definitions----------------------------------------------------------------------*/typedef struct {                /* --- set of table columns --- */  int        colvsz;            /* size of column vector */  int        colcnt;            /* number of matrix columns */  int        vldcnt;            /* number of valid matrix columns */  int        maxlen;            /* maximal length of a column name */  const char **names;           /* table column names */  double     *vals;             /* current values */} COLSET;                       /* (set of table columns) *//*----------------------------------------------------------------------  Constants----------------------------------------------------------------------*/static const char *errmsgs[] = {   /* error messages */  /* E_NONE      0 */  "no error\n",  /* E_NOMEM    -1 */  "not enough memory\n",  /* E_FOPEN    -2 */  "cannot open file %s\n",  /* E_FREAD    -3 */  "read error on file %s\n",  /* E_FWRITE   -4 */  "write error on file %s\n",  /* E_OPTION   -5 */  "unknown option -%c\n",  /* E_OPTARG   -6 */  "missing option argument\n",  /* E_ARGCNT   -7 */  "wrong number of arguments\n",  /* E_STDIN    -8 */  "double assignment of standard input\n",  /* E_FLDNAME  -9 */  "invalid field name \"%s\"\n",  /* E_VALUE   -10 */  "file %s, record %d: "                         "invalid value \"%s\" in field %d\n",  /* E_EMPFLD  -11 */  "file %s, record %d: "                         "empty name%s in field %d\n",  /* E_DUPFLD  -12 */  "file %s, record %d: "                         "duplicate field name \"%s\"\n",  /* E_FLDCNT  -13 */  "file %s, record %d: "                         "%d field(s) expected\n",  /* E_UNKNOWN -14 */  "unknown error\n"};/*----------------------------------------------------------------------  Global Variables----------------------------------------------------------------------*/static char    *prgname;        /* program name for error messages */static TABSCAN *tscan = NULL;   /* table scanner */static SYMTAB  *symtab = NULL;  /* symbol table */static COLSET  *colset = NULL;  /* column descriptions */static MVNORM  *mvnorm = NULL;  /* multivariate normal distribution */static FILE    *in     = NULL;  /* input  file */static FILE    *out    = NULL;  /* output file */static char    rdbuf[BUFSIZE];  /* read buffer */static char    fnbuf[BUFSIZE];  /* field name buffer *//*----------------------------------------------------------------------  Column Set Functions----------------------------------------------------------------------*/#define cs_create()         (COLSET*)calloc(1, sizeof(COLSET))#define cs_colcnt(s)        ((s)->colcnt)#define cs_proc(s,m,w)      mvn_add (m, (s)->vals, w)#define cs_procx(s,m,w)     mvn_addx(m, (s)->vals, w)/*--------------------------------------------------------------------*/#ifndef NDEBUGstatic void cs_delete (COLSET *cset){                               /* --- delete a column set */  if (cset->names)              /* if there is a names vector, */    free((void*)cset->names);   /* delete it */  if (cset->vals)               /* if there is a value vector, */    free((void*)cset->vals);    /* delete it */  free(cset);                   /* delete the column set body */}  /* cs_delete() */#endif/*--------------------------------------------------------------------*/static int cs_add (COLSET *cset, const char *name){                               /* --- add a column to a column set */  int  n;                       /* new column vector size, buffer */  void *p;                      /* new names/values vector */  assert(cset && name);         /* check the function arguments */  n = cset->colvsz;             /* get the column vector size and */  if (cset->colcnt >= n) {      /* if the column vector is full */    n += (n > BLKSIZE) ? (n >> 1) : BLKSIZE;    p = realloc((void*)cset->names, n *sizeof(const char*));    if (!p) return -1;          /* enlarge the names vector */    cset->names = (const char**)p;    p = realloc(cset->vals,  n *sizeof(double));    if (!p) return -1;          /* enlarge the names vector */    cset->vals  = (double*)p; cset->colvsz = n;  }                             /* set the new vectors and their size */  cset->names[cset->colcnt  ] = name;  cset->vals [cset->colcnt++] = MVN_NULL;  return 0;                     /* note the table column's name */}  /* cs_add() */               /* and return 'ok' *//*--------------------------------------------------------------------*/static void cs_set (COLSET *cset, int colid, const char *name){                               /* --- set a column value */  char   *s;                    /* end pointer for conversion */  double *val;                  /* pointer to the value to set */  assert(cset && (colid >= 0) && (colid < cset->colcnt));  if (!cset->names[colid])      /* if the column name has been */    return;                     /* deleted, abort the function */  val = cset->vals +colid;      /* get the value buffer */  if (!name) {                  /* if no value name is given, */    *val = MVN_NULL; return; }  /* the value is null */  *val = strtod(name, &s);      /* convert the attribute value */  if (*s || (s == name)) {      /* if it is nominal */    *val = MVN_NULL; cset->names[colid] = NULL; }              }  /* cs_set() */               /* invalidate the column *//*--------------------------------------------------------------------*/static void cs_prepare (COLSET *cset){                               /* --- prepare columns for output */  int        i, len;            /* loop variable, length of a name */  const char *name;             /* to traverse the column names */  cset->vldcnt = 0;             /* init. the number of valid columns */  cset->maxlen = 9;             /* and the maximal length of a name */  for (i = 0; i < cset->colcnt; i++) {    name = cset->names[i];      /* traverse the columns */    if (!name) continue;        /* skip invalidated columns */    cset->vldcnt++;             /* count the valid columns */    len = (int)strlen(name);    /* determine the length of the name */    if (len > cset->maxlen) cset->maxlen = len;  }                             /* adapt the maximal length */}  /* cs_prepare() *//*----------------------------------------------------------------------  Auxiliary Functions----------------------------------------------------------------------*/static void dblout (FILE *out, double num, int len){                               /* --- print a floating point number */  int  n, d;                    /* number of characters/decimals */  char m[16], e[8];             /* mantissa and exponent */  if (num >= 0.0) {  m[0] = ' '; }  else { num = -num; m[0] = '-'; }  len--;                        /* determine and store the sign */  n = (num == 0)                /* calculate the decimal exponent */    ? 0 : (int)floor(log10(num));  if ((n > len) || (n <= -3)) { /* if an exponent is needed, */    num /= pow(10, n);          /* comp. mantissa and note exponent */    d = len -2 -(n = sprintf(e, "e%d", n)); }  else {                        /* if no exponent is needed, */    d = len -((n > 0) ? n+2 : 2);      /* compute the number */    e[0] = n = 0;               /* of decimal places possible */  }                             /* and clear the exponent */  n += sprintf(m+1, "%.*f", (d <= 0) ? 0 : d, num);  while (n < len) {             /* format the mantissa, */    fputc(' ', out); n++; }     /* fill to the requested length, */  fputs(m, out); fputs(e, out); /* and print mantissa and exponent */}  /* dblout() *//*----------------------------------------------------------------------The above function is needed, because a format string used with thefunction fprintf does not yield appropriate results.----------------------------------------------------------------------*//*----------------------------------------------------------------------  Output Functions----------------------------------------------------------------------*/static void expvar (COLSET *cset, MVNORM *mvn,                    int tex, int cnt, FILE *out){                               /* --- print exp. values/variances */  int        i, k, n = 0;       /* loop variables */
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -