⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pca.c

📁 CURE算法实现 一种分层次的聚类算法 效果较好
💻 C
字号:
/* * pca.c -- PCA attachment for cluster program * * $Log: pca.c,v $ * Revision 1.8  1996/01/10 19:58:14  stolcke * fixed misplaced free() call * * Revision 1.7  1992/02/12 07:13:13  stolcke * -E option added * * Revision 1.6  1992/02/12  06:26:28  stolcke * eigenvector file format transposed * * Revision 1.5  1991/07/14  01:10:58  stolcke * curses support added * graphing routines moved to separate file * * Revision 1.4  91/07/10  21:26:56  stolcke * saber-cleaned and bad bug in allocation macro fixed *  * Revision 1.3  91/04/20  16:18:19  stolcke * second release (beta) *  * Revision 1.2  91/04/18  18:28:27  stolcke * general cleanup and partial rewrite *  * Revision 1.1  91/04/18  13:28:15  stolcke * Initial revision * */#if !defined(lint) && !defined(SABER)static char rcsid[] = "$Header: /usr/local/src/ai/cluster/RCS/pca.c,v 1.8 1996/01/10 19:58:14 stolcke Exp $";#endif				/* not lint */#include <stdio.h>#include <math.h>#include <string.h>#include "alloc.h"#include "error.h"/* * vars defined in main program */extern char *program;extern int vflag;extern int Eflag;/* * computational subroutines for PCA */static intcovariance(vecs, m, cov, n)    FLOAT **vecs, **cov;    int     m, n;{    FLOAT  *mean;		/* vector of means */    int     i, j, k;    IfErr (mean = new_array_of(n, FLOAT))	Erreturn("not enough core");    /* compute means */    for (i = 0; i < n; i++) {	FLOAT sum = 0.0;	int   l = 0;	for (k = 0; k < m; k++) {#ifndef NO_DONTCARES	    if ( !IS_DC(vecs[k][i]) )#endif	    {		sum += vecs[k][i];		l += 1;	    }	}	mean[i] = sum / (l != 0 ? l : 1);#ifndef NO_DONTCARES	/* replace all D/C's my the mean on that dimension */	for (k = 0; k < m; k++)	    if ( IS_DC(vecs[k][i]) )		vecs[k][i] = mean[i];#endif    }    /* compute covariance */    for (i = 0; i < n; i++)	for (j = 0; j <= i; j++)	    cov[i][j] = 0.0;    for (k = 0; k < m; k++)	for (i = 0; i < n; i++)	    for (j = 0; j <= i; j++)		cov[i][j] += (vecs[k][i] - mean[i]) *		    (vecs[k][j] - mean[j]);    for (i = 0; i < n; i++) {	for (j = 0; j < i; j++) {	    cov[i][j] /= m;	    cov[j][i] = cov[i][j];	}	cov[i][i] /= m;    }    free(mean);    return MY_OK;}static intjacobi(a, n, d, v, nrot)    FLOAT **a, d[], **v;    int     n, *nrot;{    int     j, iq, ip, i;    FLOAT   *b, *z;    IfErr (b = new_array_of(n, FLOAT))	Erreturn("not enough core");    IfErr (z = new_array_of(n, FLOAT))	Erreturn("not enough core");    for (ip = 0; ip < n; ip++) {	for (iq = 0; iq < n; iq++)	    v[ip][iq] = 0.0;	v[ip][ip] = 1.0;    }    for (ip = 0; ip < n; ip++) {	b[ip] = d[ip] = a[ip][ip];	z[ip] = 0.0;    }    *nrot = 0;    for (i = 1; i <= 50; i++) {	FLOAT	tresh;	FLOAT	sm = 0.0;	for (ip = 0; ip < n - 1; ip++) {	    for (iq = ip + 1; iq < n; iq++)		sm += fabs(a[ip][iq]);	}	if (sm == 0.0) {	    free(z);	    free(b);	    return MY_OK;	}	if (i < 4)	    tresh = 0.2 * sm / (n * n);	else	    tresh = 0.0;	for (ip = 0; ip < n - 1; ip++) {	    for (iq = ip + 1; iq < n; iq++) {		FLOAT g = 100.0 * fabs(a[ip][iq]);		if (i > 4 &&		    fabs(d[ip]) + g == fabs(d[ip]) &&		    fabs(d[iq]) + g == fabs(d[iq]))		    a[ip][iq] = 0.0;		else if (fabs(a[ip][iq]) > tresh) {		    FLOAT tau, t, s, c;		    FLOAT h = d[iq] - d[ip];		    if (fabs(h) + g == fabs(h))			t = (a[ip][iq]) / h;		    else {			FLOAT theta = 0.5 * h / (a[ip][iq]);			t = 1.0 / (fabs(theta) + sqrt(1.0 + theta * theta));			if (theta < 0.0)			    t = -t;		    }		    c = 1.0 / sqrt(1 + t * t);		    s = t * c;		    tau = s / (1.0 + c);		    h = t * a[ip][iq];		    z[ip] -= h;		    z[iq] += h;		    d[ip] -= h;		    d[iq] += h;		    a[ip][iq] = 0.0;#define rotate(a,i,j,k,l) \			g = a[i][j]; \			h = a[k][l]; \			a[i][j] = g - s *(h + g*tau); \			a[k][l] = h + s*(g - h*tau);		    for (j = 0; j < ip; j++) {			rotate(a, j, ip, j, iq)		    }		    for (j = ip + 1; j < iq; j++) {			rotate(a, ip, j, j, iq)		    }		    for (j = iq + 1; j < n; j++) {			rotate(a, ip, j, iq, j)		    }		    for (j = 0; j < n; j++) {			rotate(v, j, ip, j, iq)		    }		    *nrot += 1;		}	    }	}	for (ip = 0; ip < n; ip++) {	    b[ip] += z[ip];	    d[ip] = b[ip];	    z[ip] = 0.0;	}    }    Erreturn("too many Jacobi iterations");}static voideigsrt(d, v, n)    FLOAT   d[], **v;    int     n;{    int     k, j, i;    for (i = 0; i < n - 1; i++) {	FLOAT p = d[k = i];	for (j = i + 1; j < n; j++)	    if (d[j] >= p)		p = d[k = j];	if (k != i) {	    d[k] = d[i];	    d[i] = p;	    for (j = 0; j < n; j++) {		p = v[j][i];		v[j][i] = v[j][k];		v[j][k] = p;	    }	}    }}static intgaussjt(a, n, b, m)    FLOAT **a, **b;    int     n, m;{    int    *indxc, *indxr, *ipiv;    int     i, j, k, l;    IfErr (indxc = new_array_of(n, int))	Erreturn("not enough core");    IfErr (indxr = new_array_of(n, int))	Erreturn("not enough core");    IfErr (ipiv = new_array_of(n, int))	Erreturn("not enough core");    for (j = 0; j < n; j++)	ipiv[j] = 0;    for (i = 0; i < n; i++) {	int	icol, irow, ll;	FLOAT	pivinv;	FLOAT	big = 0.0;	for (j = 0; j < n; j++)	    if (ipiv[j] != 1)		for (k = 0; k < n; k++) {		    if (ipiv[k] == 0) {			if (fabs(a[j][k]) >= big) {			    big = fabs(a[j][k]);			    irow = j;			    icol = k;			}		    }		    else if (ipiv[k] > 1)			Erreturn("singular matrix");		}	ipiv[icol] += 1;#define swap(a,b) { \		FLOAT temp = (a); \		(a) = (b); \		(b) = temp; \	    }	if (irow != icol) {	    for (l = 0; l < n; l++)		swap(a[irow][l], a[icol][l]);	    for (l = 0; l < m; l++)		swap(b[l][irow], b[l][icol]);	}	indxr[i] = irow;	indxc[i] = icol;	if (a[icol][icol] == 0.0)	    Erreturn("singular matrix");	pivinv = 1.0 / a[icol][icol];	a[icol][icol] = 1.0;	for (l = 0; l < n; l++)	    a[icol][l] *= pivinv;	for (l = 0; l < m; l++)	    b[l][icol] *= pivinv;	for (ll = 0; ll < n; ll++)	    if (ll != icol) {		FLOAT dum = a[ll][icol];		a[ll][icol] = 0.0;		for (l = 0; l < n; l++)		    a[ll][l] -= a[icol][l] * dum;		for (l = 0; l < m; l++)		    b[l][ll] -= b[l][icol] * dum;	    }    }    for (l = n - 1; l >= 0; l--) {	if (indxr[l] != indxc[l])	    for (k = 0; k < n; k++)		swap(a[k][indxr[l]], a[k][indxc[l]]);    }    free(ipiv);    free(indxr);    free(indxc);    return MY_OK;}/* * matrix I/O */static intwrite_matrix(fp, mat, n)    FILE   *fp;    FLOAT **mat;    int     n;{    int     i, j;    /* print column vectors line-by-line */    for (j = 0; j < n; j++) {	for (i = 0; i < n; i++) {	    fprintf(fp, FLOAT_FORMAT, (double)mat[i][j]);	    fprintf(fp, " ");	}	fprintf(fp, "\n");    }    return MY_OK;}static intread_matrix(fp, mat, n)    FILE   *fp;    FLOAT **mat;    int     n;{    int     i, j;    /* read column vectors line-by-line */    for (j = 0; j < n; j++) {	for (i = 0; i < n; i++) {	    double f;	    if (fscanf(fp, "%lf", &f) != 1)		return MY_ERR;	    mat[i][j] = f;	}    }    return MY_OK;}static intwrite_pattern(mat, m, n, name, comps)    FLOAT **mat;    char  **name;    int     m, n;    char   *comps;{    int     i, j;    int    *pcs;    IfErr (pcs = new_array_of(n, int))	Erreturn("not enough core");    /* initialize list of pcs */    for (j = 0; j < n; j++)	pcs[j] = comps == NULL ? j : -1;    /* parse list of pcs */    for (j = 0; j < n && comps != NULL;) {	if (sscanf(comps, "%d", &i) == 1 &&	    i > 0 && i <= n)	    pcs[j++] = i - 1;	if ((comps = strchr(comps, ',')) != NULL)	    comps++;    }    /* dump vectors */    for (i = 0; i < m; i++) {	for (j = 0; j < n && pcs[j] >= 0; j++) {	    printf(FLOAT_FORMAT, (double)mat[i][pcs[j]]);	    printf(" ");	}	if (name != NULL) {	    if (strpbrk(name[i], " \t") != NULL)		printf("\"");	    printf("%s", name[i]);	}	printf("\n");    }    free(pcs);    return MY_OK;}pca(pattern, name, lpat, npat, efile, comps)    FLOAT **pattern;		/* array of pattern vectors */    char  **name;		/* array of label strings (or NULL) */    int     lpat, npat;    char   *efile;		/* name of an eigenbase file (or NULL) */    char   *comps;		/* list of PCs to print (or NULL) */{    FLOAT **covar;		/* covariance matrix */    FLOAT  *eval;		/* eigenvalues thereof */    FLOAT **evec;		/* eigenvectors thereof */    int     nrot;		/* no of jacobi rots */    FILE   *efp = NULL;    if (Eflag || efile == NULL || (efp = fopen(efile, "r")) == NULL) {	if (efile != NULL &&	    (efp = fopen(efile, "w")) == NULL) {	    fprintf(stderr, "%s: cannot create %s\n", program, efile);	}	IfErr (covar = new_2d_array_of(lpat, lpat, FLOAT)) {	    fprintf(stderr, "%s: not enough core for covar matrix\n", program);	    exit(1);	}	/* compute covariance matrix */	if (vflag)	    fprintf(stderr, "computing covariance ...\n");	IfErr (covariance(pattern, npat, covar, lpat)) {	    fprintf(stderr, "%s: %s: covariance failed\n", program, ERR_MSG);	    exit(1);	}	/* compute eigenvectors and -values */	IfErr (eval = new_array_of(lpat, FLOAT)) {	    fprintf(stderr, "%s: not enough core\n", program);	    exit(1);	}	IfErr (evec = new_2d_array_of(lpat, lpat, FLOAT)) {	    fprintf(stderr, "%s: not enough core for eigenbasis\n", program);	    exit(1);	}	if (vflag)	    fprintf(stderr, "computing eigenbasis ...\n");	IfErr (jacobi(covar, lpat, eval, evec, &nrot)) {	    fprintf(stderr, "%s: %s: jacobi failed\n", program, ERR_MSG);	    exit(1);	}	/* order eigenvectors */	if (vflag)	    fprintf(stderr, "sorting eigenbasis ...\n");	eigsrt(eval, evec, lpat);	if (efp != NULL) {	    if (vflag)		fprintf(stderr, "saving eigenbasis in %s\n", efile);	    write_matrix(efp, evec, lpat);	    fclose(efp);	}	free_2d_array(covar, lpat);    }    else {	/* read eigenvectors from file */	IfErr (evec = new_2d_array_of(lpat, lpat, FLOAT)) {	    fprintf(stderr, "%s: not enough core for eigenbasis\n", program);	    exit(1);	}	if (vflag)	    fprintf(stderr, "reading eigenbasis from %s\n", efile);	IfErr(read_matrix(efp, evec, lpat)) {	    fprintf(stderr, "%s: bad eigenbasis format\n", program);	    exit(1);	}    }    /* convert pattern vectors to eigenbasis */    if (vflag)	fprintf(stderr, "converting to eigenbasis ...\n");    IfErr (gaussjt(evec, lpat, pattern, npat)) {	fprintf(stderr, "%s: %s: gaussjt failed\n", program, ERR_MSG);	exit(1);    }    /* output converted patterns */    if (Eflag) {	IfErr (write_pattern(&eval, 1, lpat, NULL, comps)) {	    fprintf(stderr, "%s: %s: cannot write eigenvalues\n",			    program, ERR_MSG);	    exit(1);	}    }    else {	IfErr (write_pattern(pattern, npat, lpat, name, comps)) {	    fprintf(stderr, "%s: %s: cannot write patterns\n",			    program, ERR_MSG);	    exit(1);	}    }    free(eval);    free_2d_array(evec, lpat);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -