📄 discrim.c
字号:
/*********************************************************************//* LINEAR TREE for Supervised Learning *//* Versao 1.0 (10/12/1997) *//* Developed by: Joao Gama *//* LIACC - Uni.do Porto *//* jgama@ncc.up.pt *//*-------------------------------------------------------------------*//* FILE:discrim.c Release 1.0 3/10/97 */ /*********************************************************************/#include <stdio.h>#include <stdlib.h>#include <values.h>#include <math.h>#include "Ci_instances.h"#include "discrim.h"#include "utils.h"#include "externs.i"#define HALF -0.5#define TRESH_LIM 1.0e-6#define SQR(a) ((a)*(a))#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))#define MAX(a,b) ((a) > (b) ? (a) : (b))#define ABS(x) ((x) > 0.0 ? (x) : -1.0 * (x))#define MIN(a,b) ((a) > (b) ? (b) : (a))static double TRESH = 1.0e-6; static int *CLASS_USED = NULL;static int NR_CLASS;static int *CONT_ATT = NULL;static int NR_CONT_ATT = 0;static int NR_NON_CONT = 0;#define POSITION(D, att) (att > NrAttrs(D) ? att - NR_NON_CONT : CONT_ATT[att])/************************************************//* Prototipos para Funcoes Locais *//************************************************/static void free_covar(double ***mcov, int nr_cl, int nr_at);static void projection(DomainInfo *domain, AttrVal *instance, double *mcoef, int att, int nr_att);static double ***init_matriz_cov(int nr_cl, int nr_att);static double ***compute_covars(CiDs *ds, double ***STATIS, double *class_freq, long int low, long int high, int nr_att, int nr_cl);static double **coeficientes(DomainInfo *domain, double ***STATIS, double **MINV, double *class_freq, int nratt, int nrcla);static double **matmul(DomainInfo *domain, double ***STATIS, double **MINV, int nratt, int nrcla);static double *inner(DomainInfo *domain, double ***STATIS, double **beta, int nratt, int nrcla, double *class_freq);static void corrige_beta(double **beta, int nratt, int nrcla);static double **svd_inv(double **a, int m, int n);static void svbksb(double **u, double *w, double **v, int m, int n, double *b, double *x);int svdcmp(double **a, int m, int n, double *w, double **v);static double pythag(double a, double b);/* **********************************************//* Funcoes Publicas *//************************************************/int Nr_Att_Non_Cont(){ return NR_NON_CONT;}double **discriminant\(CiDs *ds, long int low,long int high, double ***STATIS, double *class_freq, int nr_att, int nr_cl, int *hidden){ register int i; double **coef = NULL, **MINV = NULL, ***MCOV = NULL; if (!CLASS_USED) CLASS_USED = ivector(1, Ci_NrClasses(ds->domain)); for(NR_CLASS = 0, i = 1; i <= Ci_NrClasses(ds->domain); i++) CLASS_USED[i] = (class_freq[i] > KH * nr_att) ? ++NR_CLASS : 0; if (!CONT_ATT) { CONT_ATT = ivector(1, NrAttrs(ds->domain)); for(i = 1; i <= NrAttrs(ds->domain); i++) switch(CiTypeAttr(ds->domain, i)) { case integer: case ordered: case continuous: ++NR_CONT_ATT; CONT_ATT[i] = NR_CONT_ATT; break; case nominal: ++NR_NON_CONT; break; } } *hidden = NR_CLASS - 1; if (NR_CLASS > 1) { MCOV = compute_covars(ds, STATIS, class_freq, low, high, nr_att, nr_cl); if ((MINV = svd_inv(MCOV[0], nr_att - NR_NON_CONT, nr_att - NR_NON_CONT)) != NULL) { coef = coeficientes(ds->domain, STATIS, MINV, class_freq, nr_att - NR_NON_CONT, nr_cl); free_dmatrix(MINV, 1, nr_att - NR_NON_CONT, 1, nr_att - NR_NON_CONT); } else *hidden = 0; free_covar(MCOV, NR_CLASS, nr_att- NR_NON_CONT); } return coef;}void apply_discriminant\(CiDs *ds, double **coeficientes, long int Low, long int High, int nr_att){ register int hidden; long int k; hidden = NR_CLASS - 1; for(k = Low; k <= High; k++) project_example(ds->domain, Ci_Example(ds, k), coeficientes, nr_att, hidden);}void project_example\(DomainInfo *domain, CiExample *exemplo, double **coeficientes, int nr_att, int hidden){ register int i; double probmax = MINFLOAT, sumprob = 0.0; AttrVal *instance; if (Ci_ReBuildInstance(exemplo, 1 + hidden + nr_att)) { instance = exemplo->instance; for(i = 1; i <= hidden; i++) projection(domain, instance, coeficientes[i], nr_att + i, nr_att); if (hidden > 1) { for(i = 1; i <= hidden; i++) if (probmax < CValAttEx(instance, i + nr_att)) probmax = CValAttEx(instance, i + nr_att); for(i = 1; i <= hidden; i++) { CValAttEx(instance, i + nr_att) -= probmax; if (CValAttEx(instance, i + nr_att) < -25.0) CValAttEx(instance, i + nr_att) = -25.0; CValAttEx(instance, i + nr_att) = exp(CValAttEx(instance, i + nr_att)); sumprob += CValAttEx(instance, i + nr_att); } for(i = 1; i <= hidden; i++) CValAttEx(instance, i + nr_att) /= sumprob; } }}/************************************************//* Funcoes Privadas *//************************************************/static void projection\(DomainInfo *domain, AttrVal *instance, double *mcoef, int att, int nr_att){ register int i = 2, j; CValAttEx(instance, att) = mcoef[1]; for(j = 1; j <= nr_att; j++){ if (NormalVal(instance[j])) { switch(CiTypeAttr(domain, j)) { case integer: case ordered: CValAttEx(instance, att) += (DValAttEx(instance, j) * mcoef[i++]); break; case continuous: CValAttEx(instance, att) += (CValAttEx(instance, j) * mcoef[i++]); break; } } }}/****************************************//* Covariancias FUNCTIONS *//****************************************/static double ***init_matriz_cov(int nr_cl, int nr_att){ register int i; double ***tz = NULL; tz = (double ***) malloc((nr_cl + 1) * sizeof(double **)); for(i = 0; i <= nr_cl; i++) tz[i] = dmatrix(1, nr_att, 1, nr_att); return tz;}static void free_covar(double ***mcov, int nr_cl, int nr_att){ register int i; for(i = 0; i <= nr_cl; i++) free_dmatrix(mcov[i], 1, nr_att, 1, nr_att); free(mcov);}static double ***compute_covars\(CiDs *ds, double ***STATIS, double *class_freq, long int low, long int high, int nr_att, int nr_cl){ register int att, atti, pos, pos1, classe, cl; register long int i; double x = 0.0, y = 0.0, mediax = 0.0, mediay = 0.0, weight, ***MCOV; AttrVal *instance; MCOV = init_matriz_cov(NR_CLASS, nr_att - NR_NON_CONT); for(i = low; i <= high; i++) { instance = Ci_AttVal(ds, i); classe = Ci_Classe(Ci_Example(ds, i)); weight = Ci_Weight(Ci_Example(ds, i)); if ((cl = CLASS_USED[classe]) > 0) { for(att = 1; att <= nr_att; att++) { if ((pos = POSITION(ds->domain, att)) > 0) { if (!NormalVal(instance[att])) { if (CiTypeAttr(ds->domain, att) == continuous) { x = STATIS[att][classe][1]; mediax = STATIS[att][0][1]; } else { x = STATIS[att][classe][1+NValsAttr(ds->domain, att)]; mediax = STATIS[att][0][1+NValsAttr(ds->domain, att)]; } } else { switch (CiTypeAttr(ds->domain, att)) { case continuous: x = CValAttEx(instance, att); mediax = STATIS[att][classe][1]; break; case integer: case ordered: x = DValAttEx(instance, att); mediax = STATIS[att][classe][1+NValsAttr(ds->domain, att)]; break; } } for(atti = att; atti <= nr_att; atti++) { if ((pos1 = POSITION(ds->domain, atti)) > 0) { if (!NormalVal(instance[atti])) { if (CiTypeAttr(ds->domain, atti) == continuous) { y = STATIS[atti][classe][1]; mediay = STATIS[atti][0][1]; } else { y = STATIS[atti][classe][1+NValsAttr(ds->domain, atti)]; mediay = STATIS[atti][0][1+NValsAttr(ds->domain, atti)]; } } else { switch (CiTypeAttr(ds->domain, atti)) { case continuous: y = CValAttEx(instance, atti); mediay = STATIS[atti][classe][0]; break; case integer: case ordered: y = DValAttEx(instance, atti); mediay = STATIS[atti][classe][1+NValsAttr(ds->domain, atti)]; break; } } MCOV[cl][pos][pos1] += (x - mediax) * (y - mediay) * weight; } } } } } } for(i = 1; i <= Ci_NrClasses(ds->domain); i++) if ((cl = CLASS_USED[i]) > 0) { for(att = 1; att <= nr_att - NR_NON_CONT; att++) for(atti = att; atti <= nr_att- NR_NON_CONT; atti++) { MCOV[cl][att][atti] /= (class_freq[i] -1.0); MCOV[cl][atti][att] = MCOV[cl][att][atti]; MCOV[0][att][atti] += (MCOV[cl][att][atti] * (class_freq[i] -1.0)); } } for(att = 1; att <= nr_att - NR_NON_CONT; att++) for(atti = att; atti <= nr_att- NR_NON_CONT; atti++) { MCOV[0][att][atti] /= (class_freq[0] - (double) NR_CLASS); MCOV[0][atti][att] = MCOV[0][att][atti]; } return MCOV;}static double **coeficientes\(DomainInfo *domain, double ***STATIS, double **MINV, double *class_freq, int nratt, int nrcla){ int i, j; double **beta, *alfa; double **coef = dmatrix(1, NR_CLASS - 1, 1, nratt+1); beta = matmul(domain, STATIS, MINV, nratt, nrcla); alfa = inner(domain, STATIS, beta, nratt, nrcla, class_freq); /**************************************************************/ corrige_beta(beta, nratt, nrcla); for(i = 1; i < NR_CLASS; i++) { coef[i][1] = alfa[i]; for(j = 1; j <= nratt; j++) coef[i][j+1] = ABS(beta[i][j]) < TRESH_LIM ? 0.0 : beta[i][j]; } free_dvector(alfa, 1, NR_CLASS); free_dmatrix(beta, 1, NR_CLASS, 1, nratt); return coef;}static double **matmul\(DomainInfo *domain, double ***STATIS, double **MINV, int nratt, int nrcla){ register int att, att1, cl, virtual_class; double temp, **beta; beta = dmatrix(1, NR_CLASS, 1, nratt); for(att = 1; att <= nratt; att++) { for(cl = 1; cl <= nrcla; cl++) { temp = 0.0; virtual_class = CLASS_USED[cl]; if (virtual_class) { for(att1 = 1; att1 <= nratt; att1++) { if (CiTypeAttr(domain, att1) == continuous) { temp += MINV[att][att1] * STATIS[att1][cl][1]; } else { temp += MINV[att][att1] * STATIS[att1][cl][1+NValsAttr(domain, att1)];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -