📄 matlabpca.java
字号:
buildAttributeConstructor(data); } private void buildAttributeConstructor (Instances data) throws Exception { m_eigenvalues = null; m_outputNumAtts = -1; m_attributeFilter = null; m_sumOfEigenValues = 0.0; if (data.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Can't handle string attributes!"); } m_trainInstances = data; m_debug = true; // make a copy of the training data so that we can get the class // column to append to the transformed data (if necessary) m_trainCopy = new Instances(m_trainInstances); if (m_debug) System.out.println("Copied " + m_trainInstances.numInstances() + " instances"); m_replaceMissingFilter = new ReplaceMissingValues(); m_replaceMissingFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_replaceMissingFilter); if (m_debug) System.out.println("Replaced missing values"); if (m_normalize) { m_normalizeFilter = new Normalize(); m_normalizeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter); if (m_debug) System.out.println("Normalized"); } // get rid of the class column if (m_trainInstances.classIndex() >=0) { m_hasClass = true; m_classIndex = m_trainInstances.classIndex(); m_attributeFilter = new Remove(); int [] todelete = new int [1]; todelete[0] = m_classIndex; m_attributeFilter.setAttributeIndicesArray(todelete); m_attributeFilter.setInvertSelection(false); m_attributeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter); if (m_debug) System.out.println("Deleted class attribute"); } // delete any attributes with only one distinct value or are all missing Vector deleteCols = new Vector(); int numDeletedAttributes = 0; for (int i=0;i<m_trainInstances.numAttributes();i++) { if (m_trainInstances.numDistinctValues(i) <=1) { deleteCols.addElement(new Integer(i)); numDeletedAttributes++; } } if (numDeletedAttributes > 0) { if (m_debug) System.out.println("Deleted " + numDeletedAttributes + " single-value attributes"); } // remove columns selected for deletion from the data if necessary if (deleteCols.size() > 0) { m_attributeFilter = new Remove(); int [] todelete = new int [deleteCols.size()]; for (int i=0;i<deleteCols.size();i++) { todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue(); } m_attributeFilter.setAttributeIndicesArray(todelete); m_attributeFilter.setInvertSelection(false); m_attributeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter); } if (m_debug) System.out.println("Removed attributes filtered above"); m_numInstances = m_trainInstances.numInstances(); m_numAttribs = m_trainInstances.numAttributes(); if (m_timestamp == null) { m_timestamp = getLogTimestamp(); m_pcaAttributeFilename = new String(m_pcaAttributeFilenameBase + m_timestamp + ".txt"); m_eigenvectorFilename = new String(m_eigenvectorFilenameBase + m_timestamp + ".txt"); } dumpAttributeNames(m_trainInstances, m_pcaAttributeFilename); if (m_debug) System.out.println("About to run PCA in matlab for " + m_numInstances + " instances with " + m_numAttribs + " attributes"); dumpInstances(m_dataFilename); prepareMatlab(); runMatlab(m_PCAMFile, "PCAMatlab.output"); m_eigenvectors = readColumnVectors(m_eigenvectorFilename, -1); m_eigenvalues = readVector(m_eigenvalueFilename); m_sumOfEigenValues = Utils.sum(m_eigenvalues); if (m_debug) System.out.println("Successfully parsed matlab output files"); m_transformedFormat = setOutputFormat(); // Transform data into the original format if necessary if (m_transBackToOriginal) { m_originalSpaceFormat = setOutputFormatOriginal(); // new ordered eigenvector matrix int numVectors = (m_transformedFormat.classIndex() < 0) ? m_transformedFormat.numAttributes() : m_transformedFormat.numAttributes() - 1; // transpose the matrix int nr = m_eigenvectors.length; int nc = m_eigenvectors[0].length; m_eTranspose = new double [nc][nr]; for (int i = 0; i < nc; i++) { for (int j = 0; j < nr; j++) { m_eTranspose[i][j] = m_eigenvectors[j][i]; } } } } /** Read column vectors from a text file * @param name file name * @param maxVectors max number of vectors to read, -1 to read all\ * @returns double[][] array corresponding to vectors */ public double[][] readColumnVectors(String name, int maxVectors) throws Exception { BufferedReader r = new BufferedReader(new FileReader(name)); int numAttributes=-1, numVectors=-1; String s; ArrayList linesList = new ArrayList(); while ((s = r.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(s); ArrayList lineList = new ArrayList(); while (tokenizer.hasMoreTokens()) { String value = tokenizer.nextToken(); try { lineList.add(new Double(value)); } catch (Exception e) { System.err.println("Couldn't parse " + value + " as double"); } } linesList.add(lineList); } numAttributes = linesList.size(); numVectors = ((ArrayList)linesList.get(0)).size(); double[][] vectors = new double[numAttributes][numVectors]; for (int i = 0; i < numAttributes; i++) { ArrayList line = (ArrayList)linesList.get(i); for (int j = 0; j < numVectors; j++) { vectors[i][j] = ((Double)line.get(j)).doubleValue(); } } return vectors; } /** Read a column vector from a text file * @param name file name * @returns double[] array corresponding to a vector */ public double[] readVector(String name) throws Exception { BufferedReader r = new BufferedReader(new FileReader(name)); int numAttributes = -1; ArrayList vectorList = new ArrayList(); String s; while ((s = r.readLine()) != null) { try { vectorList.add(new Double(s)); } catch (Exception e) { System.err.println("Couldn't parse " + s + " as double"); } } int length = vectorList.size(); double [] vector = new double[length]; for (int i = 0; i < length; i++) { vector[i] = ((Double) vectorList.get(i)).doubleValue(); } return vector; } /** Dump attribute names into a text file * @param data instances for which to dump attributes * @param filename name of the file where the attribute column goes */ public static void dumpAttributeNames(Instances data, String filename) { try { PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(filename))); Enumeration attributes = data.enumerateAttributes(); while (attributes.hasMoreElements()) { Attribute attr = (Attribute) attributes.nextElement(); writer.println(attr.name()); } writer.close(); } catch (Exception e) { System.err.println("Error dumping attribute names into " + filename); e.printStackTrace(); } } /** * Returns just the header for the transformed data (ie. an empty * set of instances. This is so that AttributeSelection can * determine the structure of the transformed data without actually * having to get all the transformed data through getTransformedData(). * @return the header of the transformed data. * @exception Exception if the header of the transformed data can't * be determined. */ public Instances transformedHeader() throws Exception { if (m_eigenvalues == null) { throw new Exception("Principal components hasn't been built yet"); } if (m_transBackToOriginal) { return m_originalSpaceFormat; } else { return m_transformedFormat; } } /** * Gets the transformed training data. * @return the transformed training data * @exception Exception if transformed data can't be returned */ public Instances transformedData() throws Exception { if (m_eigenvalues == null) { throw new Exception("Principal components hasn't been built yet"); } Instances output; if (m_transBackToOriginal) { output = new Instances(m_originalSpaceFormat); } else { output = new Instances(m_transformedFormat); } for (int i=0;i<m_trainCopy.numInstances();i++) { Instance converted = convertInstance(m_trainCopy.instance(i)); output.add(converted); } return output; } /** * Evaluates the merit of a transformed attribute. This is defined * to be 1 minus the cumulative variance explained. Merit can't * be meaningfully evaluated if the data is to be transformed back * to the original space. * @param att the attribute to be evaluated * @return the merit of a transformed attribute * @exception Exception if attribute can't be evaluated */ public double evaluateAttribute(int att) throws Exception { if (m_eigenvalues == null) { throw new Exception("Principal components hasn't been built yet!"); } if (m_transBackToOriginal) { return 1.0; // can't evaluate back in the original space! } // return 1-cumulative variance explained for this transformed att double cumulative = 0.0; for (int i = 0; i < att ; i++) { cumulative += m_eigenvalues[i]; } return 1.0 - cumulative / m_sumOfEigenValues; } /** * Dump covariance matrix into a file */ private void dumpInstances(String tempFile) { try { PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(tempFile))); for (int k = 0; k < m_numInstances; k++) { Instance instance = m_trainInstances.instance(k); for (int j = 0; j < m_numAttribs; j++) { writer.print(instance.value(j) + " "); } writer.println(); } writer.close(); } catch (Exception e) { System.err.println("Could not create a temporary file for dumping the covariance matrix: " + e); } } /** Create matlab m-file for PCA * @param filename file where matlab script is created */ public void prepareMatlab() { try{ PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(m_PCAMFile))); writer.println("function MatlabPCA()"); writer.println("DATA = load('" + m_dataFilename + "');"); writer.println("[m,n] = size(DATA);"); writer.println("r = min(m-1,n); % max possible rank of x"); writer.println("avg = mean(DATA);"); writer.println("centerx = (DATA - avg(ones(m,1),:));"); writer.println(); writer.println("[U,latent,pc] = svd(centerx./sqrt(m-1),0);"); writer.println("score = centerx*pc;"); writer.println(); writer.println("if nargout < 3, return; end"); writer.println("latent = diag(latent).^2;"); writer.println("if (r<n)"); writer.println(" latent = [latent(1:r); zeros(n-r,1)];"); writer.println(" score(:,r+1:end) = 0;"); writer.println("end"); writer.println(); writer.println("if nargout < 4, return; end"); writer.println("tmp = sqrt(diag(1./latent(1:r)))*score(:,1:r)';"); writer.println("tsquare = sum(tmp.*tmp)';"); writer.println(); writer.println("[numAttributes, numVectors] = size(pc);"); writer.println("[numValues, dummy] = size(latent);"); writer.println(); writer.println("save " + m_eigenvectorFilename + " pc -ASCII -DOUBLE;"); writer.println("save " + m_eigenvalueFilename + " latent -ASCII -DOUBLE;"); writer.println("\n\n"); writer.close(); } catch (Exception e) { System.err.println("Could not create matlab file: " + e); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -