⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pcacorr.java

📁 PCA in (learning machine) java.
💻 JAVA
字号:
import java.io.*;import java.util.*;import java.text.*;import Jama.*;/** * PCAcorr - Principal Components Analysis on correlations <p> * Jama Matrix class package, "JAMA: A Java Matrix Package" is used.<br> * See: http://math.nist.gov/javanumerics/jama <p> * Example of use: <p> * <tt> javac PCAcorr.java </tt> <br> * <tt> java PCAcorr <a href="../iris.dat">iris.dat</a> >  * <a href="pcaoutput.txt">pcaoutput.txt</a></tt> <p> * Format of input data set:  * <ul> * <li> integer row and column dimensions,  * <li> followed by floating values  * <li> which are read row-wise. * </ul> * Outputs produced: * <ul> * <li> Echo of input file name, input dimensions, sample of data * <li> Variable means and standard deviations * <li> Correlation matrix to be diagonalized * <li> Eigenvectors * <li> Eigenvalues and as cumulative percentages * <li> Row projections in new principal component space * <li> Column projections in new principal component space * </ul> * Version: 2002 Aug. 15 <br> * Author: F. Murtagh, f.murtagh@qub.ac.uk * @version 2002 Aug 15 * @author F. Murtagh, f.murtagh@qub.ac.uk */public class PCAcorr{public static void main (String argv[]){  PrintStream out = System.out;  try{     //-------------------------------------------------------------------     if (argv.length == 0) {        System.out.println(" Syntax: java PCAcorr infile.dat ");        System.out.println(" Input file format: ");        System.out.println(" Line 1: integer no. rows, no. cols.");        System.out.println(" Successive lines: matrix values, floating");        System.out.println(" Read in row-wise");        System.exit (1);     }     String filname = argv[0];      System.out.println (" Input file name: " + filname);     // Open the matrix file     FileInputStream is  = new FileInputStream(filname);     BufferedReader  bis = new BufferedReader(new InputStreamReader(is));     StreamTokenizer st  = new StreamTokenizer(bis);     // Row and column sizes, read in first     st.nextToken();  int n = (int)st.nval;     st.nextToken();  int m = (int)st.nval;     System.out.println(" No. of rows, n = " + n);     System.out.println(" No. of cols, m = " + m);          // Input array, values to be read in successively, float     double[][] indat = new double[n][m];      double inval;     // New read in input array values, successively     System.out.println        (" Input data sample follows as a check, first 4 values.");     for (int i = 0; i < n; i++) {        for (int j = 0; j < m; j++) {             st.nextToken();             inval = (double)st.nval;             indat[i][j] = inval;             if (i < 2 && j < 2) {                System.out.println(" value = " + inval);             }        }     }       System.out.println();     //-------------------------------------------------------------------     // Data preprocessing - standardization and determining correlations      double[][] indatstd = Standardize(n, m, indat);     // use Jama matrix class       Matrix X = new Matrix(indatstd);     // Sums of squares and cross-products matrix     Matrix Xprime = X.transpose();     Matrix SSCP   = Xprime.times(X);     // Note the following:     // - with no preprocessing of the input data, we have an SSCP matrix     // - with centering of columns (i.e. each col. has col. mean      //   [vector in row-space] subtracted) we have variances/covariances     // - with centering and reduction to unit variance [i.e. centered     //   cols. are divided by std. dev.] we have correlations     // Note: the current version supports correlations only     // Print out SSCP matrix      // Parameters: floating value width, and no. of decimal places     // Comment out this line for large data sets, and/or diff. precision     System.out.println();     System.out.println             (" SSCP or sums-of-squares and cross-products matrix:");     System.out.println (" (Note: correlations in this implementation)");     SSCP.print(6,2);     //-------------------------------------------------------------------     // Eigen decomposition     EigenvalueDecomposition evaldec = SSCP.eig();     Matrix evecs = evaldec.getV();     double[] evals = evaldec.getRealEigenvalues();     // print out eigenvectors     System.out.println (" Eigenvectors (leftmost col <--> largest eval):");     // evecs contains the cols. ordered right to left     // Evecs is the more natural order with cols. ordered left to right     // So to repeat: leftmost col. of Evecs is assoc with largest Evals     // Evals and Evecs ordered from left to right     double tot = 0.0;      for (int j = 0; j < evals.length; j++)  {         tot += evals[j];      }     // reverse order of evals into Evals     double[] Evals = new double[m];     for (int j = 0; j < m; j++) {         Evals[j] = evals[m - j - 1];     }     // reverse order of Matrix evecs into Matrix Evecs     double[][] tempold = evecs.getArray();     double[][] tempnew = new double[m][m];      for (int j1 = 0; j1 < m; j1++) {         for (int j2 = 0; j2 < m; j2++) {             tempnew[j1][j2] = tempold[j1][m - j2 - 1];	 }     }     Matrix Evecs = new Matrix(tempnew);     Evecs.print(10,4);     System.out.println();     System.out.println (" Eigenvalues and as cumulative percentages:");      double runningtotal = 0.0;     double[] percentevals = new double[m];     for (int j = 0; j < Evals.length; j++) {         percentevals[j] = runningtotal + 100.0*Evals[j]/tot;         runningtotal    = percentevals[j];     }     printVect(Evals, 4, 10);     printVect(percentevals, 4, 10);     //-------------------------------------------------------------------     // Projections - row, and col     // Row projections in new space, X U  Dims: (n x m) x (m x m)     System.out.println();     System.out.println(" Row projections in new principal component space:");     Matrix rowproj = X.times(Evecs);     rowproj.print(10,4);     // Col projections (X'X) U    (4x4) x4  And col-wise div. by sqrt(evals)     Matrix colproj = SSCP.times(Evecs);        // We need to leave colproj Matrix class and instead use double array     double[][] ynew = colproj.getArray();     for (int j1 = 0; j1 < m; j1++) {         for (int j2 = 0; j2 < m; j2++) {             if (Evals[j2] > 0.00005) {                 ynew[j1][j2] = ynew[j1][j2]/Math.sqrt(Evals[j2]); }             if (Evals[j2] <= 0.00005) {                 ynew[j1][j2] = 0.0; }         }     }     System.out.println ();     System.out.println (" Column projections in new princ. comp. space.");     printMatrix(m, m, ynew, 4, 8);     //-------------------------------------------------------------------     // That's it.  }  catch (IOException e) {     out.println("error: " + e);     System.exit(1);  }} // end of main    //-------------------------------------------------------------------      // Little method for helping in output formating      public static String getSpaces(int n) {    StringBuffer sb = new StringBuffer(n);    for (int i = 0; i < n; i++) sb.append(' ');    return sb.toString();    } // getSpaces       //-------------------------------------------------------------------    /**     * Method for printing a matrix  <br>     * Based on ER Harold, "Java I/O", O'Reilly, around p. 473.     * @param n1 row dimension of matrix     * @param n2 column dimension of matrix     * @param m input matrix values      * @param d display precision, number of decimal places     * @param w display precision, total width of floating value     */    public static void printMatrix(int n1, int n2, double[][] m, int d, int w)    {	// Some definitions for handling output formating      NumberFormat myFormat = NumberFormat.getNumberInstance();      FieldPosition fp = new FieldPosition(NumberFormat.INTEGER_FIELD);      myFormat.setMaximumIntegerDigits(d);      myFormat.setMaximumFractionDigits(d);      myFormat.setMinimumFractionDigits(d);      for (int i=0; i<n1; i++)	  {	      // Print each row, elements separated by spaces              for (int j=0; j<n2; j++)		  // Following unfortunately doesn't format at all		  //                  System.out.print(m[i][j] + "  ");                  {                  String valString = myFormat.format(                       m[i][j], new StringBuffer(), fp).toString();                  valString = getSpaces(w - fp.getEndIndex()) + valString;                  System.out.print(valString);                  }              // Start a new line at the end of a row              System.out.println();          }      // Leave a gap after the entire matrix      System.out.println();    } // printMatrix    //-------------------------------------------------------------------    /**     * Method printVect for printing a vector <br>     * Based on ER Harold, "Java I/O", O'Reilly, around p. 473.     * @param m input vector of length m.length     * @param d display precision, number of decimal places     * @param w display precision, total width of floating value     */    public static void printVect(double[] m, int d, int w)    {	// Some definitions for handling output formating      NumberFormat myFormat = NumberFormat.getNumberInstance();      FieldPosition fp = new FieldPosition(NumberFormat.INTEGER_FIELD);      myFormat.setMaximumIntegerDigits(d);      myFormat.setMaximumFractionDigits(d);      myFormat.setMinimumFractionDigits(d);      int len = m.length;      for (int i=0; i<len; i++)	  {	      // Following would be nice, but doesn't format adequately	      //                  System.out.print(m[i] + "  ");             String valString = myFormat.format(                   m[i], new StringBuffer(), fp).toString();             valString = getSpaces(w - fp.getEndIndex()) + valString;                   System.out.print(valString);          }          // Start a new line at the row end          System.out.println();      // Leave a gap after the entire vector      System.out.println();    } // printVect    //-------------------------------------------------------------------    /**     * Method for standardizing the input data <p>     * Note the formalas used (since these vary between implementations):<br>     * reduction: (vect - meanvect)/sqrt(nrow)*colstdev <br>     * colstdev:  sum_cols ((vect - meanvect)^2/nrow) <br>     * if colstdev is close to 0, then set it to 1.     * @param nrow number of rows in input matrix     * @param ncol number of columns in input matrix     * @param A input matrix values     */    public static double[][] Standardize(int nrow, int ncol, double[][] A)    {    double[] colmeans = new double[ncol];    double[] colstdevs = new double[ncol];    // Adat will contain the standardized data and will be returned    double[][] Adat = new double[nrow][ncol];    double[] tempcol = new double[nrow];    double tot;     // Determine means and standard deviations of variables/columns    for (int j=0; j<ncol; j++)        {         tot = 0.0;         for (int i=0; i<nrow; i++)             {                tempcol[i] = A[i][j];                tot += tempcol[i];             }	 // For this col, det mean         colmeans[j] = tot/(double)nrow;         for (int i=0; i<nrow; i++) {                colstdevs[j] += Math.pow(tempcol[i]-colmeans[j], 2.0);             }             colstdevs[j] = Math.sqrt(colstdevs[j]/((double)nrow));             if (colstdevs[j] < 0.0001) { colstdevs[j] = 1.0; }	}        System.out.println(" Variable means:");        printVect(colmeans, 4, 8);        System.out.println(" Variable standard deviations:");        printVect(colstdevs, 4, 8);	// Now ceter to zero mean, and reduce to unit standard deviation    for (int j=0; j<ncol; j++)        {         for (int i=0; i<nrow; i++)             {               Adat[i][j] = (A[i][j] - colmeans[j])/                 (Math.sqrt((double)nrow)*colstdevs[j]);             }        }    return Adat;    } // Standardize    //-------------------------------------------------------------------}  // PCAcorr

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -