📄 kelr.c
字号:
/*---------------------------------------------------------------------- File : kelr.c Contents: kernel estimation for logistic regression Author : Christian Borgelt History : 2003.10.29 file created from file mcli.c 2004.04.22 special mode for distance listing added 2007.02.13 adapted to modified module tabscan----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <stdarg.h>#include <string.h>#include <float.h>#include <time.h>#include <assert.h>#include "params.h"#include "radfn.h"#ifndef MAT_READ#define MAT_READ#endif#include "matrix.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define PRGNAME "kelr"#define DESCRIPTION "kernel estimation for logistic regression"#define VERSION "version 1.1 (2004.04.22) " \ "(c) 2003-2004 Christian Borgelt"/* --- error codes --- */#define OK 0 /* no error */#define E_NONE 0 /* no error */#define E_NOMEM (-1) /* not enough memory */#define E_FOPEN (-2) /* file open failed */#define E_FREAD (-3) /* file read failed */#define E_FWRITE (-4) /* file write failed */#define E_OPTION (-5) /* unknown option */#define E_OPTARG (-6) /* missing option argument */#define E_ARGCNT (-7) /* wrong number of arguments */#define E_PARAM (-7) /* invalid parameter value */#define E_PATCNT (-9) /* pattern file is empty */#define E_VALCNT (-10) /* no input variables */#define E_CLASS (-11) /* too many classes */#define E_UNKNOWN (-18) /* unknown error *//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/static const char *errmsgs[] = { /* error messages */ /* E_NONE 0 */ "no error\n", /* E_NOMEM -1 */ "not enough memory\n", /* E_FOPEN -2 */ "cannot open file %s\n", /* E_FREAD -3 */ "read error on file %s\n", /* E_FWRITE -4 */ "write error on file %s\n", /* E_OPTION -5 */ "unknown option -%c\n", /* E_OPTARG -6 */ "missing option argument\n", /* E_ARGCNT -7 */ "wrong number of arguments\n", /* E_PARAM -8 */ "invalid parameter value %g\n", /* E_PATCNT -9 */ "pattern file is empty\n", /* E_VALCNT -10 */ "no input variables\n", /* E_CLASS -11 */ "too many classes\n", /* -12 to -15 */ NULL, NULL, NULL, NULL, /* E_VALUE -16 */ "file %s, record %d: " "invalid value %s in field %d\n", /* E_FLDCNT -17 */ "file %s, record %d: " "%s%d field(s) instead of %d\n", /* E_UNKNOWN -18 */ "unknown error\n"};/*---------------------------------------------------------------------- Global Variables----------------------------------------------------------------------*/const char *prgname = NULL; /* program name for error messages */static TABSCAN *tscan = NULL; /* table file scanner */static MATRIX *mat = NULL; /* matrix of training patterns */static double *kes[2] = {NULL, NULL}; /* vectors of kernel estimates */static FILE *in = NULL; /* input file */static FILE *out = NULL; /* output file *//*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/static void error (int code, ...){ /* --- print error message */ va_list args; /* list of variable arguments */ const char *msg; /* error message */ assert(prgname); /* check the program name */ if (code < E_UNKNOWN) code = E_UNKNOWN; if (code < 0) { /* if to report an error, */ msg = errmsgs[-code]; /* get the error message */ if (!msg) msg = errmsgs[-E_UNKNOWN]; fprintf(stderr, "\n%s: ", prgname); va_start(args, code); /* get variable arguments */ vfprintf(stderr, msg, args);/* print the error message */ va_end(args); /* end argument evaluation */ } #ifndef NDEBUG if (kes[0])free(kes[0]); if (mat) mat_delete(mat); /* clean up memory */ if (tscan) ts_delete(tscan); /* and close files */ if (in && (in != stdin)) fclose(in); if (out && (out != stdout)) fclose(out); #endif #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif exit(code); /* abort the program */} /* error() *//*--------------------------------------------------------------------*/int main (int argc, char *argv[]){ /* --- main function */ int i, k = 0, n; /* loop variables, buffers */ char *s; /* to traverse options */ char **optarg = NULL; /* option argument */ char *fn_in = NULL; /* name of input file */ char *fn_out = NULL; /* name of output file */ char *blanks = NULL; /* blanks */ char *fldseps = NULL; /* field separators */ char *recseps = NULL; /* record separators */ char *comment = NULL; /* comment characters */ char *fmt = "%g"; /* format for number output */ char seps[4] = " \n"; /* separators for output */ double params[] = {2, 0}; /* kernel function parameters */ double radius = 1; /* radius of kernel function */ double thresh = 0; /* threshold for class assignment */ int invert = 0; /* flag for threshold inversion */ int dist = 0; /* flag for distance listing */ int patcnt = 0; /* number of patterns */ int valcnt = 0; /* number of values per pattern */ RADFN *radfn = rf_gauss; /* radial kernel function */ double *v1, *v2; /* vectors to compare */ int c1 = 0, c2, cnt; /* classes of vectors, class counter */ double d, t; /* for distance computation */ TSINFO *err; /* error information */ prgname = argv[0]; /* get program name for error msgs. */ /* --- print startup/usage message --- */ if (argc > 1) { /* if arguments are given */ fprintf(stderr, "%s - %s\n", argv[0], DESCRIPTION); fprintf(stderr, VERSION); } /* print a startup message */ else { /* if no argument given */ printf("usage: %s [options] infile outfile\n", argv[0]); printf("%s\n", DESCRIPTION); printf("%s\n", VERSION); printf("-c use Cauchy kernel " "(default: Gaussian kernel)\n"); printf("-p#:# kernel function parameters " "(default: %g:%g)\n", params[0], params[1]); printf("-s size/radius of kernel function" "(default: %g)\n", radius); printf("-t# threshold for class assignment " "(default: > %g)\n", thresh); printf("-i invert threshold (<= instead of >)\n"); printf("-d special mode: list all distances\n"); printf("-b/f/r# blank characters, field and record separators\n" " (default: \" \\t\\r\", \" \\t\", \"\\n\")\n"); printf("-C# comment characters (default: \"#\")\n"); printf("infile file to read (no header, only numbers)\n"); printf("outfile file to write kernel estimates to\n"); return 0; /* print a usage message */ } /* and abort the program */ /* --- evaluate arguments --- */ for (i = 1; i < argc; i++) { /* traverse arguments */ s = argv[i]; /* get option argument */ if (optarg) { *optarg = s; optarg = NULL; continue; } if ((*s == '-') && *++s) { /* -- if argument is an option */ while (1) { /* traverse characters */ switch (*s++) { /* evaluate option */ case 'c': radfn = rf_cauchy; break; case 'p': getdblvec(s, &s, 3, params); break; case 's': radius = strtod(s, &s); break; case 't': thresh = strtod(s, &s); break; case 'i': invert = 1; break; case 'd': dist = 1; break; case 'o': optarg = &fmt; break; case 'b': optarg = &blanks; break; case 'f': optarg = &fldseps; break; case 'r': optarg = &recseps; break; case 'C': optarg = &comment; break; default : error(E_OPTION, *--s); break; } /* set option variables */ if (!*s) break; /* if at end of string, abort loop */ if (optarg) { *optarg = s; optarg = NULL; break; } } } /* get option argument */ else { /* -- if argument is no option */ switch (k++) { /* evaluate non-option */ case 0: fn_in = s; break; case 1: fn_out = s; break; default: error(E_ARGCNT); break; } /* note filenames */ } } if (optarg) error(E_OPTARG); /* check the option argument */ if (k != 2) error(E_ARGCNT); /* and the number of arguments */ if (params[0] <= 0) error(E_PARAM, params[0]); if (params[1] < 0) error(E_PARAM, params[1]); if (radius <= 0) error(E_PARAM, radius); /* --- read training patterns --- */ if (fn_in && *fn_in) /* if a file name is given, */ in = fopen(fn_in, "r"); /* open the file for reading */ else { /* if no file name is given, */ in = stdin; fn_in = "<stdin>"; } /* use standard input */ fprintf(stderr, "\nreading %s ... ", fn_in); if (!in) error(E_FOPEN, fn_in); tscan = ts_create(); /* create a table file scanner and */ if (!tscan) error(E_NOMEM); /* set the separator characters */ if (blanks) seps[0] = ts_chars(tscan, TS_BLANK, blanks); if (fldseps) seps[1] = ts_chars(tscan, TS_FLDSEP, fldseps); if (recseps) seps[2] = ts_chars(tscan, TS_RECSEP, recseps); if (comment) ts_chars(tscan, TS_COMMENT, comment); ts_chars(tscan, TS_NULL, ""); /* remove the null value characters */ mat = mat_readx(tscan, in, 0, valcnt); if (!mat) { /* read the training patterns */ err = ts_info(tscan); /* on error get the error info. */ error(err->code, fn_in, err->rec, err->s, err->fld, err->exp); } /* abort with an error message */ patcnt = mat_rowcnt(mat); /* get the number of data points */ valcnt = mat_colcnt(mat); /* and their dimensionality */ if (ts_delim(tscan) != TS_EOF)/* check for end of file */ error(E_VALUE, fn_in, patcnt+1, "\"\"", 1); if (in != stdin) { /* if not read from standard input, */ fclose(in); in = NULL; } /* close the input file */ if (patcnt <= 0) error(E_PATCNT); if (valcnt <= 1) error(E_VALCNT); fprintf(stderr, "[%d pattern(s)] done.\n", patcnt); /* --- open the output file --- */ if (fn_out && *fn_out) /* if an output file name is given, */ out = fopen(fn_out, "w"); /* open the output file */ else { /* if no output file name is given, */ out = stdout; fn_out = "<stdout>"; } /* write to std. output */ fprintf(stderr, "writing %s ... ", fn_out); if (!out) error(E_FOPEN, fn_out); /* --- compute kernel estimates --- */ if (dist) /* print the table header */ fprintf(out, "dist class\n"); else { /* if kernel estimation */ kes[0] = (double*)calloc(patcnt +patcnt, sizeof(double)); if (!kes[0]) error(E_NOMEM); kes[1] = kes[0] +patcnt; /* create kernel estimate vectors */ } /* and organize the memory */ radius *= radius; /* compute variance and */ if (radius <= 0) radius = 1; /* check and adapt it */ for (cnt = 0, i = 0; i < patcnt; i++) { v1 = mat_row(mat, i); /* traverse the matrix rows */ if (!dist) { /* if kernel estimation */ c1 = ((v1[valcnt-1] > thresh) ? 1 : 0) ^ invert; if (c1 <= 0) cnt++; /* count vectors of class 1 */ kes[c1][i] += radfn(0, params); } /* count vector for itself */ for (k = i; --k >= 0; ) { /* traverse preceding matrix rows */ v2 = mat_row(mat, k); /* (consider each pair exactly once) */ for (d = 0, n = valcnt-1; --n >= 0; ) { t = v1[n] -v2[n]; d += t*t; } if (dist) { /* if distance computation */ c1 = (1 << (int)v1[valcnt-1]) | (1 << (int)v2[valcnt-1]); c2 = c1 -('z'-'a'+1); /* compute class indicator */ if (c2 < 0) c1 = 'a' -1 +c1; else if (c2 <= 'Z'-'A'+1) c1 = 'A' -1 +c2; else error(E_CLASS); /* code the class indicator */ fprintf(out, "%g %c\n", sqrt(d), c1); } else { /* if kernel estimation */ c2 = ((v2[valcnt-1] > thresh) ? 1 : 0) ^ invert; kes[c2][i] += t = radfn(d /radius, params); kes[c1][k] += t; /* compute the distance between the */ } /* two patterns and sum the values */ } /* of the kernel function */ } if (!dist) { /* if kernel estimation */ for (i = patcnt; --i >= 0; ) { /* traverse the patterns */ v1 = mat_row(mat, i); /* get the next pattern and */ t = kes[0][i] +kes[1][i]; /* sum the kernel function values */ v1[valcnt-1] = (t > 0) ? kes[1][i] /t : 1; } /* compute the probability estimate */ } /* from the aggregated weights */ /* --- write kernel estimates --- */ if (!dist /* if kernel estimation */ && (mat_write(mat, out, fmt, seps+1) != 0)) error(E_FWRITE, fn_out); /* write the modified matrix */ if (out != stdout) { /* if not written to standard output, */ i = fclose(out); out = NULL;/* close the output file */ if (i != 0) error(E_FWRITE, fn_out); } /* check for a write error and */ fprintf(stderr, "done.\n"); /* print a success message */ /* --- clean up --- */ #ifndef NDEBUG if (kes[0]) free(kes[0]); /* delete the vector of estimates, */ mat_delete(mat); /* the training patterns, */ ts_delete(tscan); /* and the table scanner */ #endif #ifdef STORAGE showmem("at end of program"); /* check memory usage */ #endif return 0; /* return 'ok' */} /* main() */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -