⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pca_hao.c

📁 VC 6.0下的主成分分析代码
💻 C
📖 第 1 页 / 共 2 页
字号:
/*********************************/
/* Principal Components Analysis */
/*********************************/

/*********************************************************************/
/* Principal Components Analysis or the Karhunen-Loeve expansion is a
   classical method for dimensionality reduction or exploratory data
   analysis.  One reference among many is: F. Murtagh and A. Heck,
   Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987 
   (hardbound, paperback and accompanying diskette).

   This program is public-domain.  If of importance, please reference 
   the author.  Please also send comments of any kind to the author:

   F. Murtagh
   Schlossgartenweg 1          or        35 St. Helen's Road
   D-8045 Ismaning                       Booterstown, Co. Dublin
   W. Germany                            Ireland

   Phone:        + 49 89 32006298 (work)
                 + 49 89 965307 (home)
   Telex:        528 282 22 eo d
   Fax:          + 49 89 3202362
   Earn/Bitnet:  fionn@dgaeso51,  fim@dgaipp1s,  murtagh@stsci
   Span:         esomc1::fionn
   Internet:     murtagh@scivax.stsci.edu
   

   A Fortran version of this program is also available.     
    
   F. Murtagh, Munich, 6 June 1989                                   */   
/*********************************************************************/

#include <stdio.h>
#include <string.h>
#include <math.h>

#define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) )

main(argc, argv)
int argc;
char *argv[];

{
FILE *stream;
int  n, m,  i, j, k, k2;
float **data, **matrix(), **symmat, **symmat2, *vector(), *evals, *interm;
void free_matrix(), free_vector(), corcol(), covcol(), scpcol();
void tred2(), tqli();
float in_value;
char option, *strncpy();

/*********************************************************************
   Get from command line:
   input data file name, #rows, #cols, option.

   Open input file: fopen opens the file whose name is stored in the
   pointer argv[argc-1]; if unsuccessful, error message is printed to
   stderr.
*********************************************************************/

   if (argc !=  5)
      {
      printf("Syntax help: PCA filename #rows #cols option\n\n");
      printf("(filename -- give full path name,\n");
      printf(" #rows                          \n");
      printf(" #cols    -- integer values,\n");                  
      printf(" option   -- R (recommended) for correlation analysis,\n");
      printf("             V for variance/covariance analysis\n");
      printf("             S for SSCP analysis.)\n");
      exit(1);
      }

   n = atoi(argv[2]);              /* # rows */
   m = atoi(argv[3]);              /* # columns */
   strncpy(&option,argv[4],1);     /* Analysis option */

   printf("No. of rows: %d, no. of columns: %d.\n",n,m);
   printf("Input file: %s.\n",argv[1]);

   if ((stream = fopen(argv[1],"r")) == NULL)
      {
      fprintf(stderr, "Program %s : cannot open file %s\n",
                       argv[0], argv[1]);
      fprintf(stderr, "Exiting to system.");
      exit(1);
      /* Note: in versions of DOS prior to 3.0, argv[0] contains the
         string "C". */
      }

    /* Now read in data. */

    data = matrix(n, m);  /* Storage allocation for input data */

    for (i = 1; i <= n; i++)
        {
        for (j = 1; j <= m; j++)
            {
            fscanf(stream, "%f", &in_value);
            data[i][j] = in_value;
            }
         }

    /* Check on (part of) input data.
    for (i = 1; i <= 18; i++) {for (j = 1; j <= 8; j++)  {
        printf("%7.1f", data[i][j]);  }  printf("\n");  }
    */

    symmat = matrix(m, m);  /* Allocation of correlation (etc.) matrix */

   /* Look at analysis option; branch in accordance with this. */

     switch(option)
             {
          case 'R':
          case 'r':
              printf("Analysis of correlations chosen.\n");
              corcol(data, n, m, symmat);

                          /* Output correlation matrix.
                          for (i = 1; i <= m; i++) {
                           for (j = 1; j <= 8; j++)  {
                            printf("%7.4f", symmat[i][j]);  }
                            printf("\n");  }
                          */
              break;
          case 'V':
          case 'v':
              printf("Analysis of variances-covariances chosen.\n");
              covcol(data, n, m, symmat);

                          /* Output variance-covariance matrix.
                          for (i = 1; i <= m; i++) {
                          for (j = 1; j <= 8; j++)  {
                            printf("%7.1f", symmat[i][j]);  }
                            printf("\n");  }
                          */
              break;
          case 'S':
          case 's':
              printf("Analysis of sums-of-squares-cross-products");
              printf(" matrix chosen.\n");
              scpcol(data, n, m, symmat);

                         /* Output SSCP matrix.
                         for (i = 1; i <= m; i++) {
                          for (j = 1; j <= 8; j++)  {
                            printf("%7.1f", symmat[i][j]);  }
                            printf("\n");  }
                         */
              break;
          default:
              printf("Option: %s\n",option);
              printf("For option, please type R, V, or S\n");
              printf("(upper or lower case).\n");
              printf("Exiting to system.\n");
              exit(1);
              break;
          }

/*********************************************************************
    Eigen-reduction
**********************************************************************/

    /* Allocate storage for dummy and new vectors. */
    evals = vector(m);     /* Storage alloc. for vector of eigenvalues */
    interm = vector(m);    /* Storage alloc. for 'intermediate' vector */
    symmat2 = matrix(m, m);  /* Duplicate of correlation (etc.) matrix */
    for (i = 1; i <= m; i++) 
	{
     for (j = 1; j <= m; j++) 
	 {
      symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */
                             
	 }
                            
	}
    tred2(symmat, m, evals, interm);  /* Triangular decomposition */
    tqli(evals, interm, m, symmat);   /* Reduction of sym. trid. matrix */
    /* evals now contains the eigenvalues,
       columns of symmat now contain the associated eigenvectors. */

     printf("\nEigenvalues:\n");
     for (j = m; j >= 1; j--) 
	 {
       printf("%18.5f\n", evals[j]);		//特征值 
	 }
     printf("\n(Eigenvalues should be strictly positive; limited\n");
     printf("precision machine arithmetic may affect this.\n");
     printf("Eigenvalues are often expressed as cumulative\n");
     printf("percentages, representing the 'percentage variance\n");
     printf("explained' by the associated axis or principal component.)\n");

     printf("\nEigenvectors:\n");
     printf("(First three; their definition in terms of original vbes.)\n");
     for (j = 1; j <= m; j++) 
	 {
       for (i = 1; i <= 3; i++)  
	   {
          printf("%12.4f", symmat[j][m-i+1]); 
	   }
          printf("\n");  
	 }

     /* Form projections of row-points on first three prin. components. */
     /* Store in 'data', overwriting original data. */
     for (i = 1; i <= n; i++) 
	 {
      for (j = 1; j <= m; j++)
	  {
        interm[j] = data[i][j]; 
	  }   /* data[i][j] will be overwritten */
        for (k = 1; k <= 3; k++) 
		{
          data[i][k] = 0.0;
          for (k2 = 1; k2 <= m; k2++)
		  {
            data[i][k] += interm[k2] * symmat[k2][m-k+1]; 
		  }
        }
     }

     printf("\nProjections of row-points on first 3 prin. comps.:\n");
     for (i = 1; i <= n; i++)
	 {
       for (j = 1; j <= 3; j++) 
	   {
          printf("%12.4f", data[i][j]); 
	   }
          printf("\n"); 
	 }

     /* Form projections of col.-points on first three prin. components. */
     /* Store in 'symmat2', overwriting what was stored in this. */
     for (j = 1; j <= m; j++) 
	 {
      for (k = 1; k <= m; k++) 
	  {
        interm[k] = symmat2[j][k];
	  }  /*symmat2[j][k] will be overwritten*/
        for (i = 1; i <= 3; i++) 
		{
          symmat2[j][i] = 0.0;
          for (k2 = 1; k2 <= m; k2++) 
		  {
            symmat2[j][i] += interm[k2] * symmat[k2][m-i+1]; 
		  }
          if (evals[m-i+1] > 0.0005)   /* Guard against zero eigenvalue */
             symmat2[j][i] /= sqrt(evals[m-i+1]);   /* Rescale */
          else
             symmat2[j][i] = 0.0;    /* Standard kludge */
        }
     }

     printf("\nProjections of column-points on first 3 prin. comps.:\n");
     for (j = 1; j <= m; j++) 
	 {
       for (k = 1; k <= 3; k++)  
	   {
          printf("%12.4f", symmat2[j][k]);  
	   }
          printf("\n"); 
	 }

    free_matrix(data, n, m);
    free_matrix(symmat, m, m);
    free_matrix(symmat2, m, m);
    free_vector(evals, m);
    free_vector(interm, m);

}

/**  Correlation matrix: creation  ***********************************/

void corcol(data, n, m, symmat)
float **data, **symmat;
int n, m;
/* Create m * m correlation matrix from given n * m data matrix. */
{
float eps = 0.005;
float x, *mean, *stddev, *vector();
int i, j, j1, j2;

/* Allocate storage for mean and std. dev. vectors */

mean = vector(m);
stddev = vector(m);

/* Determine mean of column vectors of input data matrix */

for (j = 1; j <= m; j++)
    {
    mean[j] = 0.0;
    for (i = 1; i <= n; i++)
        {
        mean[j] += data[i][j];
        }
    mean[j] /= (float)n;
    }

printf("\nMeans of column vectors:\n");
for (j = 1; j <= m; j++)  {
    printf("%7.2f",mean[j]);  }   printf("\n");

/* Determine standard deviations of column vectors of data matrix. */

for (j = 1; j <= m; j++)
    {
    stddev[j] = 0.0;
    for (i = 1; i <= n; i++)
        {
        stddev[j] += (   ( data[i][j] - mean[j] ) *
                         ( data[i][j] - mean[j] )  );
        }
        stddev[j] /= (float)n;
        stddev[j] = sqrt(stddev[j]);
        /* The following in an inelegant but usual way to handle
        near-zero std. dev. values, which below would cause a zero-
        divide. */
        if (stddev[j] <= eps) stddev[j] = 1.0;
    }

printf("\nStandard deviations of columns:\n");
for (j = 1; j <= m; j++) { printf("%7.1f", stddev[j]); }
printf("\n");

/* Center and reduce the column vectors. */

for (i = 1; i <= n; i++)
    {
    for (j = 1; j <= m; j++)
        {
        data[i][j] -= mean[j];
        x = sqrt((float)n);
        x *= stddev[j];
        data[i][j] /= x;
        }
    }

/* Calculate the m * m correlation matrix. */
for (j1 = 1; j1 <= m-1; j1++)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -