📄 sc.c
字号:
/***********************************************************************sc.c - creates classifiers from feature vectors of examples, as well as classifying example feature vectors.Copyright (C) 1991 Dean RubineThis program is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 1, or (at your option)any later version.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with this program (in ../COPYING); if not, write to the FreeSoftware Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.***********************************************************************/#include "bitvector.h"#include "matrix.h"#include "util.h"#include "sc.h"#include "stdio.h"#include "stdlib.h"#include "math.h"#include "zdebug.h"#define EPS (1.0e-6) /* for singular matrix check */sClassifiersNewClassifier(){ register sClassifier sc = allocate(1, struct sclassifier); sc->nfeatures = -1; sc->nclasses = 0; sc->classdope = allocate(MAXSCLASSES, sClassDope); sc->w = NULL; return sc;}voidsFreeClassifier(sc)register sClassifier sc;{ register int i; register sClassDope scd; for(i = 0; i < sc->nclasses; i++) { scd = sc->classdope[i]; if(scd->name) free(scd->name); if(scd->sumcov) FreeMatrix(scd->sumcov); if(scd->average) FreeVector(scd->average); free(scd); if(sc->w && sc->w[i]) FreeVector(sc->w[i]); } free(sc->classdope); if(sc->w) free(sc->w); if(sc->cnst) FreeVector(sc->cnst); if(sc->invavgcov) FreeMatrix(sc->invavgcov); free(sc);}sClassDopesClassNameLookup(sc, classname)register sClassifier sc;register char *classname;{ register int i; register sClassDope scd; static sClassifier lastsc = NULL; static sClassDope lastscd = NULL; /* quick check for last class name */ if(lastsc == sc && lastscd != NULL && STREQ(lastscd->name, classname)) return lastscd; /* linear search through all classes for name */ for(i = 0; i < sc->nclasses; i++) { scd = sc->classdope[i]; if(STREQ(scd->name, classname)) return lastsc = sc, lastscd = scd; } lastsc = NULL; lastscd = NULL; return NULL;}static sClassDopesAddClass(sc, classname)register sClassifier sc;char *classname;{ register sClassDope scd; sc->classdope[sc->nclasses] = scd = allocate(1, struct sclassdope); scd->name = scopy(classname); scd->number = sc->nclasses; scd->nexamples = 0; scd->sumcov = NULL; ++sc->nclasses; return scd;}voidsAddExample(sc, classname, y)register sClassifier sc;char *classname;Vector y;{ register sClassDope scd; register int i, j; double nfv[50]; double nm1on, recipn; scd = sClassNameLookup(sc, classname); if(scd == NULL) {/* fprintf(stderr, "sAddExample: calling sAddClass on %s.\n", classname); */ scd = sAddClass(sc, classname); } if(sc->nfeatures == -1) { sc->nfeatures = NROWS(y);/* fprintf(stderr, "sAddExample: setting sc->nfeatures to NROWS(y).\n"); */ } if(scd->nexamples == 0) {/* fprintf(stderr, "sAddExample: allocating & zeroing scd->average & scd->sumcov.\n"); */ scd->average = NewVector(sc->nfeatures); ZeroVector(scd->average); scd->sumcov = NewMatrix(sc->nfeatures, sc->nfeatures); ZeroMatrix(scd->sumcov); } if(sc->nfeatures != NROWS(y)) { PrintVector(y, "sAddExample: funny feature vector nrows!=%d", sc->nfeatures); return; } scd->nexamples++; nm1on = ((double) scd->nexamples-1)/scd->nexamples; recipn = 1.0/scd->nexamples; /* incrementally update covariance matrix */ for(i = 0; i < sc->nfeatures; i++) nfv[i] = y[i] - scd->average[i]; /* only upper triangular part computed */ for(i = 0; i < sc->nfeatures; i++) for(j = i; j < sc->nfeatures; j++) scd->sumcov[i][j] += nm1on * nfv[i] * nfv[j]; /* incrementally update mean vector */ for(i = 0; i < sc->nfeatures; i++) scd->average[i] = nm1on * scd->average[i] + recipn * y[i];}voidsDoneAdding(sc)register sClassifier sc;{ register int i, j; int c; int ne, denom; double oneoverdenom; register Matrix s; register Matrix avgcov; double det; register sClassDope scd; if(sc->nclasses == 0) { error("No classes for adding to classifier"); return; } /* Given covariance matrices for each class (* number of examples - 1) compute the average (common) covariance matrix */ avgcov = NewMatrix(sc->nfeatures, sc->nfeatures); ZeroMatrix(avgcov); ne = 0; for(c = 0; c < sc->nclasses; c++) { scd = sc->classdope[c]; ne += scd->nexamples; s = scd->sumcov; for(i = 0; i < sc->nfeatures; i++) for(j = i; j < sc->nfeatures; j++) avgcov[i][j] += s[i][j]; } denom = ne - sc->nclasses; if(denom <= 0) { error("Number of classes must be less than number of examples"); return; } oneoverdenom = 1.0 / denom; for(i = 0; i < sc->nfeatures; i++) for(j = i; j < sc->nfeatures; j++) avgcov[j][i] = avgcov[i][j] *= oneoverdenom; Z('a') PrintMatrix(avgcov, "Average Covariance Matrix\n"); /* invert the avg covariance matrix */ sc->invavgcov = NewMatrix(sc->nfeatures, sc->nfeatures); det = InvertMatrix(avgcov, sc->invavgcov); if(fabs(det) <= EPS) FixClassifier(sc, avgcov); /* now compute discrimination functions */ sc->w = allocate(sc->nclasses, Vector); sc->cnst = NewVector(sc->nclasses); for(c = 0; c < sc->nclasses; c++) { scd = sc->classdope[c]; sc->w[c] = NewVector(sc->nfeatures); VectorTimesMatrix(scd->average, sc->invavgcov, sc->w[c]); sc->cnst[c] = -0.5 * InnerProduct(sc->w[c], scd->average); /* could add log(priorprob class c) to cnst[c] */ } FreeMatrix(avgcov); return;}sClassDopesClassify(sc, fv) { return sClassifyAD(sc, fv, NULL, NULL);}sClassDopesClassifyAD(sc, fv, ap, dp)sClassifier sc;Vector fv;double *ap;double *dp;{ double disc[MAXSCLASSES]; register int i, maxclass; double denom, exp(); register sClassDope scd; double d; if(sc->w == NULL) { error("%x not a trained classifier", sc); return(NULL); } for(i = 0; i < sc->nclasses; i++) {/* ari */ double IP; IP = InnerProduct(sc->w[i], fv);/* fprintf(stderr, "sClassifyAD: InnerProduct for class %s is %f.\n", sc->classdope[i]->name, IP); *//* fprintf(stderr, "sClassifyAD: sc->cnst[i] = %f.\n", sc->cnst[i]); */ disc[i] = IP + sc->cnst[i];/* fprintf(stderr, "sClassifyAD: Set disc = %f for class %s.\n", disc[i],sc->classdope[i]->name); */ /* disc[i] = InnerProduct(sc->w[i], fv) + sc->cnst[i]; */ } maxclass = 0; for(i = 1; i < sc->nclasses; i++) if(disc[i] > disc[maxclass]) maxclass = i;/* ari *//* PF_INIT_COS 0 initial angle (cos) *//* PF_INIT_SIN 1 initial angle (sin) *//* PF_BB_LEN 2 length of bounding box diagonal *//* PF_BB_TH 3 angle of bounding box diagonal *//* PF_SE_LEN 4 length between start and end points *//* PF_SE_COS 5 cos of angle between start and end points *//* PF_SE_SIN 6 sin of angle between start and end points *//* PF_LEN 7 arc length of path *//* PF_TH 8 total angle traversed *//* PF_ATH 9 sum of abs vals of angles traversed *//* PF_SQTH 10 sum of squares of angles traversed *//* PF_DUR 11 duration of path */ /* ifndef USE_TIME *//* NFEATURES 12 *//* else *//* PF_MAXV 12 maximum speed *//* NFEATURES 13 *//* endif *//** fprintf(stderr, "\nFeature vector:\n");* fprintf(stderr, " start cosine %8.4f path length %8.4f\n",* fv[PF_INIT_COS], fv[PF_LEN]);* fprintf(stderr, " start sine %8.4f total angle %8.4f\n",* fv[PF_INIT_SIN], fv[PF_TH]);* fprintf(stderr, " b.b. length %8.4f total abs. angle %8.4f\n",* fv[PF_BB_LEN], fv[PF_ATH]);* fprintf(stderr, " b.b. angle %8.4f total sq. angle %8.4f\n",* fv[PF_BB_TH], fv[PF_SQTH]);* fprintf(stderr, " st-end length %8.4f duration %8.4f\n",* fv[PF_SE_LEN], fv[PF_DUR]);* fprintf(stderr, " st-end cos %8.4f\n", fv[PF_SE_COS]);* fprintf(stderr, " st-end sin %8.4f\n", fv[PF_SE_SIN]);*/ ZZ('C') { scd = sc->classdope[maxclass]; PrintVector(fv, "%10.10s ", scd->name); ZZZ('C') { for(i = 0; i < sc->nclasses; i++) { scd = sc->classdope[i]; PrintVector(scd->average, "%5.5s %5g ", scd->name, disc[i]); } } } scd = sc->classdope[maxclass];/* ari *//* fprintf(stderr,"%s", scd->name); *//* fprintf(stderr,"Stroke identified as %s [ ", scd->name); for (i = 0; i < sc->nclasses; i++) { if ( (disc[maxclass] - disc[i] < 5.0) && (i != maxclass) ) fprintf(stderr,"%s ", sc->classdope[i]->name); } fprintf(stderr,"], ");*/ if(ap) { /* calculate probability of non-ambiguity */ for(denom = 0, i = 0; i < sc->nclasses; i++) /* quick check to avoid computing negligible term */ if((d = disc[i] - disc[maxclass]) > -7.0) denom += exp(d); *ap = 1.0 / denom; } if(dp) /* calculate distance to mean of chosen class */ *dp = MahalanobisDistance(fv, scd->average, sc->invavgcov); return scd;}/* Compute (v-u)' sigma (v-u) */doubleMahalanobisDistance(v, u, sigma)register Vector v, u;register Matrix sigma;{ register int i; static Vector space; double result; if(space == NULL || NROWS(space) != NROWS(v)) { if(space) FreeVector(space); space = NewVector(NROWS(v)); } for(i = 0; i < NROWS(v); i++) space[i] = v[i] - u[i]; result = QuadraticForm(space, sigma); return result;}voidFixClassifier(sc, avgcov)register sClassifier sc;Matrix avgcov;{ int i; double det; BitVector bv; Matrix m, r; /* just add the features one by one, discarding any that cause the matrix to be non-invertible */ CLEAR_BIT_VECTOR(bv); for(i = 0; i < sc->nfeatures; i++) { BIT_SET(i, bv); m = SliceMatrix(avgcov, bv, bv); r = NewMatrix(NROWS(m), NCOLS(m)); det = InvertMatrix(m, r); if(fabs(det) <= EPS) BIT_CLEAR(i, bv); FreeMatrix(m); FreeMatrix(r); } m = SliceMatrix(avgcov, bv, bv); r = NewMatrix(NROWS(m), NCOLS(m)); det = InvertMatrix(m, r); if(fabs(det) <= EPS) { error("Can't fix classifier!"); return; } DeSliceMatrix(r, 0.0, bv, bv, sc->invavgcov); FreeMatrix(m); FreeMatrix(r);}voidsDumpClassifier(sc)register sClassifier sc;{ register sClassIndex c; printf("\n----Classifier %x, %d features:-----\n", (int)sc, sc->nfeatures); printf("%d classes: ", sc->nclasses); for(c = 0; c < sc->nclasses; c++) printf("%s ", sc->classdope[c]->name); printf("Discrimination functions:\n"); for(c = 0; c < sc->nclasses; c++) { PrintVector(sc->w[c], "%s: %g + ", sc->classdope[c]->name, sc->cnst[c]); printf("Mean vectors:\n"); PrintVector(sc->classdope[c]->average, "%s: ", sc->classdope[c]->name); } if( sc->invavgcov != NULL ) { PrintMatrix(sc->invavgcov, "Inverse average covariance matrix:\n"); } printf("\n---------\n\n");}voidsWrite(outfile, sc)FILE *outfile;sClassifier sc;{ int i; register sClassDope scd; fprintf(outfile, "%d classes\n", sc->nclasses); for(i = 0; i < sc->nclasses; i++) { scd = sc->classdope[i]; fprintf(outfile, "%s\n", scd->name); } for(i = 0; i < sc->nclasses; i++) { scd = sc->classdope[i]; OutputVector(outfile, scd->average); OutputMatrix(outfile, scd->sumcov); OutputVector(outfile, sc->w[i]); } OutputVector(outfile, sc->cnst); OutputMatrix(outfile, sc->invavgcov);}sClassifiersRead(infile)FILE *infile;{ int i, n; register sClassifier sc; register sClassDope scd; char buf[100]; Z('a') printf("Reading classifier \n"); sc = sNewClassifier(); fgets(buf, 100, infile); if(sscanf(buf, "%d", &n) != 1) { error("Input error in classifier file"); sFreeClassifier(sc); return(NULL); } Z('a') printf(" %d classes \n", n); for(i = 0; i < n; i++) { fscanf(infile, "%s", buf); scd = sAddClass(sc, buf); Z('a') printf(" %s \n", scd->name); } sc->w = allocate(sc->nclasses, Vector); for(i = 0; i < sc->nclasses; i++) { scd = sc->classdope[i]; scd->average = InputVector(infile); scd->sumcov = InputMatrix(infile); sc->w[i] = InputVector(infile); } sc->cnst = InputVector(infile); sc->invavgcov = InputMatrix(infile); Z('a') printf("\n"); return sc;}voidsDistances(sc, nclosest)register sClassifier sc;{ register Matrix d = NewMatrix(sc->nclasses, sc->nclasses); register int i, j; double min, max = 0; int n, mi, mj; printf("-----------\n"); printf("Computing %d closest pairs of classes\n", nclosest); for(i = 0; i < NROWS(d); i++) { for(j = i+1; j < NCOLS(d); j++) { d[i][j] = MahalanobisDistance( sc->classdope[i]->average, sc->classdope[j]->average, sc->invavgcov); if(d[i][j] > max) max = d[i][j]; } } for(n = 1; n <= nclosest; n++) { min = max; mi = mj = -1; for(i = 0; i < NROWS(d); i++) { for(j = i+1; j < NCOLS(d); j++) { if(d[i][j] < min) min = d[mi=i][mj=j]; } } if(mi == -1) break; printf("%2d) %10.10s to %10.10s d=%g nstd=%g\n", n, sc->classdope[mi]->name, sc->classdope[mj]->name, d[mi][mj], sqrt(d[mi][mj])); d[mi][mj] = max+1; } printf("-----------\n"); FreeMatrix(d);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -