⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 matlabpca.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
    buildAttributeConstructor(data);  }  private void buildAttributeConstructor (Instances data) throws Exception {    m_eigenvalues = null;    m_outputNumAtts = -1;    m_attributeFilter = null;    m_sumOfEigenValues = 0.0;    if (data.checkForStringAttributes()) {      throw  new UnsupportedAttributeTypeException("Can't handle string attributes!");    }    m_trainInstances = data;    m_debug = true;    // make a copy of the training data so that we can get the class    // column to append to the transformed data (if necessary)    m_trainCopy = new Instances(m_trainInstances);    if (m_debug) System.out.println("Copied " + m_trainInstances.numInstances() + " instances");    m_replaceMissingFilter = new ReplaceMissingValues();    m_replaceMissingFilter.setInputFormat(m_trainInstances);    m_trainInstances = Filter.useFilter(m_trainInstances, 					m_replaceMissingFilter);    if (m_debug) System.out.println("Replaced missing values");    if (m_normalize) {      m_normalizeFilter = new Normalize();      m_normalizeFilter.setInputFormat(m_trainInstances);      m_trainInstances = Filter.useFilter(m_trainInstances, m_normalizeFilter);      if (m_debug) System.out.println("Normalized");    }    // get rid of the class column    if (m_trainInstances.classIndex() >=0) {      m_hasClass = true;      m_classIndex = m_trainInstances.classIndex();      m_attributeFilter = new Remove();      int [] todelete = new int [1];      todelete[0] = m_classIndex;       m_attributeFilter.setAttributeIndicesArray(todelete);      m_attributeFilter.setInvertSelection(false);      m_attributeFilter.setInputFormat(m_trainInstances);      m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);      if (m_debug) System.out.println("Deleted class attribute");    }        // delete any attributes with only one distinct value or are all missing    Vector deleteCols = new Vector();    int numDeletedAttributes = 0;    for (int i=0;i<m_trainInstances.numAttributes();i++) {      if (m_trainInstances.numDistinctValues(i) <=1) {	deleteCols.addElement(new Integer(i));	numDeletedAttributes++;      }    }    if (numDeletedAttributes > 0) {       if (m_debug) System.out.println("Deleted " + numDeletedAttributes + " single-value attributes");    }    // remove columns selected for deletion from the data if necessary    if (deleteCols.size() > 0) {      m_attributeFilter = new Remove();      int [] todelete = new int [deleteCols.size()];      for (int i=0;i<deleteCols.size();i++) {	todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();      }      m_attributeFilter.setAttributeIndicesArray(todelete);      m_attributeFilter.setInvertSelection(false);      m_attributeFilter.setInputFormat(m_trainInstances);      m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);    }    if (m_debug) System.out.println("Removed attributes filtered above");        m_numInstances = m_trainInstances.numInstances();    m_numAttribs = m_trainInstances.numAttributes();    if (m_timestamp == null) {       m_timestamp = getLogTimestamp();      m_pcaAttributeFilename = new String(m_pcaAttributeFilenameBase + m_timestamp + ".txt");      m_eigenvectorFilename = new String(m_eigenvectorFilenameBase + m_timestamp + ".txt");    }    dumpAttributeNames(m_trainInstances, m_pcaAttributeFilename);        if (m_debug) System.out.println("About to run PCA in matlab for " + m_numInstances +		       " instances with " + m_numAttribs + " attributes");    dumpInstances(m_dataFilename);    prepareMatlab();    runMatlab(m_PCAMFile, "PCAMatlab.output");    m_eigenvectors = readColumnVectors(m_eigenvectorFilename, -1);    m_eigenvalues = readVector(m_eigenvalueFilename);    m_sumOfEigenValues = Utils.sum(m_eigenvalues);    if (m_debug) System.out.println("Successfully parsed matlab output files");    m_transformedFormat = setOutputFormat();    // Transform data into the original format if necessary    if (m_transBackToOriginal) {      m_originalSpaceFormat = setOutputFormatOriginal();            // new ordered eigenvector matrix      int numVectors = (m_transformedFormat.classIndex() < 0) 	? m_transformedFormat.numAttributes()	: m_transformedFormat.numAttributes() - 1;      // transpose the matrix      int nr = m_eigenvectors.length;      int nc = m_eigenvectors[0].length;      m_eTranspose = 	new double [nc][nr];      for (int i = 0; i < nc; i++) {	for (int j = 0; j < nr; j++) {	  m_eTranspose[i][j] = m_eigenvectors[j][i];	}      }    }  }  /** Read column vectors from a text file   * @param name file name   * @param maxVectors max number of vectors to read, -1 to read all\   * @returns double[][] array corresponding to vectors   */  public double[][] readColumnVectors(String name, int maxVectors) throws Exception {    BufferedReader r = new BufferedReader(new FileReader(name));    int numAttributes=-1, numVectors=-1;    String s;    ArrayList linesList = new ArrayList();    while ((s = r.readLine()) != null) {      StringTokenizer tokenizer = new StringTokenizer(s);      ArrayList lineList = new ArrayList();      while (tokenizer.hasMoreTokens()) {	String value = tokenizer.nextToken();	try {	  lineList.add(new Double(value));	} catch (Exception e) {	  System.err.println("Couldn't parse " + value + " as double");	}      }      linesList.add(lineList);    }    numAttributes = linesList.size();    numVectors = ((ArrayList)linesList.get(0)).size();    double[][] vectors = new double[numAttributes][numVectors];    for (int i = 0; i < numAttributes; i++) {      ArrayList line = (ArrayList)linesList.get(i);      for (int j = 0; j < numVectors; j++) {	vectors[i][j] = ((Double)line.get(j)).doubleValue();      }    }     return vectors;  }   /** Read a column vector from a text file   * @param name file name   * @returns double[] array corresponding to a vector   */  public double[] readVector(String name) throws Exception {     BufferedReader r = new BufferedReader(new FileReader(name));     int numAttributes = -1;          ArrayList vectorList = new ArrayList();     String s;     while ((s = r.readLine()) != null) {       try { 	 vectorList.add(new Double(s));       } catch (Exception e) {	 System.err.println("Couldn't parse " + s + " as double");       }     }     int length = vectorList.size();     double [] vector = new double[length];     for (int i = 0; i < length; i++) {       vector[i] = ((Double) vectorList.get(i)).doubleValue();     }      return vector;  }  /** Dump attribute names into a text file   * @param data instances for which to dump attributes   * @param filename name of the file where the attribute column goes   */  public static void dumpAttributeNames(Instances data, String filename) {    try {      PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(filename)));      Enumeration attributes = data.enumerateAttributes();      while (attributes.hasMoreElements()) {	Attribute attr = (Attribute) attributes.nextElement();	writer.println(attr.name());      }      writer.close();          } catch (Exception e) {      System.err.println("Error dumping attribute names into " + filename);      e.printStackTrace();    }  }      /**   * Returns just the header for the transformed data (ie. an empty   * set of instances. This is so that AttributeSelection can   * determine the structure of the transformed data without actually   * having to get all the transformed data through getTransformedData().   * @return the header of the transformed data.   * @exception Exception if the header of the transformed data can't   * be determined.   */  public Instances transformedHeader() throws Exception {    if (m_eigenvalues == null) {      throw new Exception("Principal components hasn't been built yet");    }    if (m_transBackToOriginal) {      return m_originalSpaceFormat;    } else {      return m_transformedFormat;    }  }  /**   * Gets the transformed training data.   * @return the transformed training data   * @exception Exception if transformed data can't be returned   */  public Instances transformedData() throws Exception {    if (m_eigenvalues == null) {      throw new Exception("Principal components hasn't been built yet");    }    Instances output;    if (m_transBackToOriginal) {      output = new Instances(m_originalSpaceFormat);    } else {      output = new Instances(m_transformedFormat);    }    for (int i=0;i<m_trainCopy.numInstances();i++) {      Instance converted = convertInstance(m_trainCopy.instance(i));      output.add(converted);    }    return output;  }  /**   * Evaluates the merit of a transformed attribute. This is defined   * to be 1 minus the cumulative variance explained. Merit can't   * be meaningfully evaluated if the data is to be transformed back   * to the original space.   * @param att the attribute to be evaluated   * @return the merit of a transformed attribute   * @exception Exception if attribute can't be evaluated   */  public double evaluateAttribute(int att) throws Exception {    if (m_eigenvalues == null) {      throw new Exception("Principal components hasn't been built yet!");    }    if (m_transBackToOriginal) {      return 1.0; // can't evaluate back in the original space!    }    // return 1-cumulative variance explained for this transformed att    double cumulative = 0.0;    for (int i = 0; i < att ; i++) {      cumulative += m_eigenvalues[i];    }    return 1.0 - cumulative / m_sumOfEigenValues;  }  /**   * Dump covariance matrix into a file   */  private void dumpInstances(String tempFile) {    try {       PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(tempFile)));      for (int k = 0; k < m_numInstances; k++) {	Instance instance = m_trainInstances.instance(k);	for (int j = 0; j < m_numAttribs; j++) {	  writer.print(instance.value(j) + " ");	}	writer.println();      }      writer.close();    } catch (Exception e) {      System.err.println("Could not create a temporary file for dumping the covariance matrix: " + e);    }  }    /** Create matlab m-file for PCA   * @param filename file where matlab script is created   */  public void prepareMatlab() {    try{      PrintWriter writer = new PrintWriter(new BufferedOutputStream(new FileOutputStream(m_PCAMFile)));      writer.println("function MatlabPCA()");      writer.println("DATA = load('" + m_dataFilename + "');");      writer.println("[m,n] = size(DATA);");      writer.println("r = min(m-1,n);     % max possible rank of x");      writer.println("avg = mean(DATA);");      writer.println("centerx = (DATA - avg(ones(m,1),:));");      writer.println();      writer.println("[U,latent,pc] = svd(centerx./sqrt(m-1),0);");      writer.println("score = centerx*pc;");      writer.println();      writer.println("if nargout < 3, return; end");      writer.println("latent = diag(latent).^2;");      writer.println("if (r<n)");      writer.println("   latent = [latent(1:r); zeros(n-r,1)];");      writer.println("   score(:,r+1:end) = 0;");      writer.println("end");      writer.println();      writer.println("if nargout < 4, return; end");      writer.println("tmp = sqrt(diag(1./latent(1:r)))*score(:,1:r)';");      writer.println("tsquare = sum(tmp.*tmp)';");      writer.println();      writer.println("[numAttributes, numVectors] = size(pc);");      writer.println("[numValues, dummy] = size(latent);");      writer.println();      writer.println("save " + m_eigenvectorFilename + " pc -ASCII -DOUBLE;");      writer.println("save " + m_eigenvalueFilename + " latent -ASCII -DOUBLE;");      writer.println("\n\n");      writer.close();    }     catch (Exception e) {      System.err.println("Could not create matlab file: " + e);    }  

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -