⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 principalcomponents.java

📁 这是关于数据挖掘的一些算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
      for (int i=0;i<deleteCols.size();i++) {        todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();      }      m_attributeFilter.setAttributeIndicesArray(todelete);      m_attributeFilter.setInvertSelection(false);      m_attributeFilter.setInputFormat(m_trainInstances);      m_trainInstances = Filter.useFilter(m_trainInstances, m_attributeFilter);    }        // can evaluator handle the processed data ? e.g., enough attributes?    getCapabilities().testWithFail(m_trainInstances);    m_numInstances = m_trainInstances.numInstances();    m_numAttribs = m_trainInstances.numAttributes();    fillCorrelation();    double [] d = new double[m_numAttribs];     double [][] v = new double[m_numAttribs][m_numAttribs];    Matrix corr = new Matrix(m_correlation);    corr.eigenvalueDecomposition(v, d);    m_eigenvectors = (double [][])v.clone();    m_eigenvalues = (double [])d.clone();    // any eigenvalues less than 0 are not worth anything --- change to 0    for (int i = 0; i < m_eigenvalues.length; i++) {      if (m_eigenvalues[i] < 0) {        m_eigenvalues[i] = 0.0;      }    }    m_sortedEigens = Utils.sort(m_eigenvalues);    m_sumOfEigenValues = Utils.sum(m_eigenvalues);    m_transformedFormat = setOutputFormat();    if (m_transBackToOriginal) {      m_originalSpaceFormat = setOutputFormatOriginal();            // new ordered eigenvector matrix      int numVectors = (m_transformedFormat.classIndex() < 0)         ? m_transformedFormat.numAttributes()        : m_transformedFormat.numAttributes() - 1;      double [][] orderedVectors =         new double [m_eigenvectors.length][numVectors + 1];            // try converting back to the original space      for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {        for (int j = 0; j < m_numAttribs; j++) {          orderedVectors[j][m_numAttribs - i] =             m_eigenvectors[j][m_sortedEigens[i]];        }      }            // transpose the matrix      int nr = orderedVectors.length;      int nc = orderedVectors[0].length;      m_eTranspose =         new double [nc][nr];      for (int i = 0; i < nc; i++) {        for (int j = 0; j < nr; j++) {          m_eTranspose[i][j] = orderedVectors[j][i];        }      }    }  }  /**   * Returns just the header for the transformed data (ie. an empty   * set of instances. This is so that AttributeSelection can   * determine the structure of the transformed data without actually   * having to get all the transformed data through getTransformedData().   * @return the header of the transformed data.   * @throws Exception if the header of the transformed data can't   * be determined.   */  public Instances transformedHeader() throws Exception {    if (m_eigenvalues == null) {      throw new Exception("Principal components hasn't been built yet");    }    if (m_transBackToOriginal) {      return m_originalSpaceFormat;    } else {      return m_transformedFormat;    }  }  /**   * Gets the transformed training data.   * @return the transformed training data   * @throws Exception if transformed data can't be returned   */  public Instances transformedData() throws Exception {    if (m_eigenvalues == null) {      throw new Exception("Principal components hasn't been built yet");    }    Instances output;    if (m_transBackToOriginal) {      output = new Instances(m_originalSpaceFormat);    } else {      output = new Instances(m_transformedFormat);    }    for (int i=0;i<m_trainCopy.numInstances();i++) {      Instance converted = convertInstance(m_trainCopy.instance(i));      output.add(converted);    }    return output;  }  /**   * Evaluates the merit of a transformed attribute. This is defined   * to be 1 minus the cumulative variance explained. Merit can't   * be meaningfully evaluated if the data is to be transformed back   * to the original space.   * @param att the attribute to be evaluated   * @return the merit of a transformed attribute   * @throws Exception if attribute can't be evaluated   */  public double evaluateAttribute(int att) throws Exception {    if (m_eigenvalues == null) {      throw new Exception("Principal components hasn't been built yet!");    }    if (m_transBackToOriginal) {      return 1.0; // can't evaluate back in the original space!    }    // return 1-cumulative variance explained for this transformed att    double cumulative = 0.0;    for (int i = m_numAttribs - 1; i >= m_numAttribs - att - 1; i--) {      cumulative += m_eigenvalues[m_sortedEigens[i]];    }    return 1.0 - cumulative / m_sumOfEigenValues;  }  /**   * Fill the correlation matrix   */  private void fillCorrelation() {    m_correlation = new double[m_numAttribs][m_numAttribs];    double [] att1 = new double [m_numInstances];    double [] att2 = new double [m_numInstances];    double corr;    for (int i = 0; i < m_numAttribs; i++) {      for (int j = 0; j < m_numAttribs; j++) {        if (i == j) {          m_correlation[i][j] = 1.0;        } else {          for (int k = 0; k < m_numInstances; k++) {            att1[k] = m_trainInstances.instance(k).value(i);            att2[k] = m_trainInstances.instance(k).value(j);          }          corr = Utils.correlation(att1,att2,m_numInstances);          m_correlation[i][j] = corr;          m_correlation[j][i] = corr;        }      }    }  }  /**   * Return a summary of the analysis   * @return a summary of the analysis.   */  private String principalComponentsSummary() {    StringBuffer result = new StringBuffer();    double cumulative = 0.0;    Instances output = null;    int numVectors=0;    try {      output = setOutputFormat();      numVectors = (output.classIndex() < 0)         ? output.numAttributes()        : output.numAttributes()-1;    } catch (Exception ex) {    }    //tomorrow    result.append("Correlation matrix\n"+matrixToString(m_correlation)                  +"\n\n");    result.append("eigenvalue\tproportion\tcumulative\n");    for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {      cumulative+=m_eigenvalues[m_sortedEigens[i]];      result.append(Utils.doubleToString(m_eigenvalues[m_sortedEigens[i]],9,5)                    +"\t"+Utils.                    doubleToString((m_eigenvalues[m_sortedEigens[i]] /                                     m_sumOfEigenValues),                                     9,5)                    +"\t"+Utils.doubleToString((cumulative /                                                 m_sumOfEigenValues),9,5)                    +"\t"+output.attribute(m_numAttribs - i - 1).name()+"\n");    }    result.append("\nEigenvectors\n");    for (int j = 1;j <= numVectors;j++) {      result.append(" V"+j+'\t');    }    result.append("\n");    for (int j = 0; j < m_numAttribs; j++) {      for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {        result.append(Utils.                      doubleToString(m_eigenvectors[j][m_sortedEigens[i]],7,4)                      +"\t");      }      result.append(m_trainInstances.attribute(j).name()+'\n');    }    if (m_transBackToOriginal) {      result.append("\nPC space transformed back to original space.\n"                    +"(Note: can't evaluate attributes in the original "                    +"space)\n");    }    return result.toString();  }  /**   * Returns a description of this attribute transformer   * @return a String describing this attribute transformer   */  public String toString() {    if (m_eigenvalues == null) {      return "Principal components hasn't been built yet!";    } else {      return "\tPrincipal Components Attribute Transformer\n\n"        +principalComponentsSummary();    }  }  /**   * Return a matrix as a String   * @param matrix that is decribed as a string   * @return a String describing a matrix   */  private String matrixToString(double [][] matrix) {    StringBuffer result = new StringBuffer();    int last = matrix.length - 1;    for (int i = 0; i <= last; i++) {      for (int j = 0; j <= last; j++) {        result.append(Utils.doubleToString(matrix[i][j],6,2)+" ");        if (j == last) {          result.append('\n');        }      }    }    return result.toString();  }  /**   * Convert a pc transformed instance back to the original space   *    * @param inst        the instance to convert   * @return            the processed instance   * @throws Exception  if something goes wrong   */  private Instance convertInstanceToOriginal(Instance inst)    throws Exception {    double[] newVals = null;    if (m_hasClass) {      newVals = new double[m_numAttribs+1];    } else {      newVals = new double[m_numAttribs];    }    if (m_hasClass) {      // class is always appended as the last attribute      newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1);    }    for (int i = 0; i < m_eTranspose[0].length; i++) {      double tempval = 0.0;      for (int j = 1; j < m_eTranspose.length; j++) {        tempval += (m_eTranspose[j][i] *                     inst.value(j - 1));       }      newVals[i] = tempval;    }        if (inst instanceof SparseInstance) {      return new SparseInstance(inst.weight(), newVals);    } else {      return new Instance(inst.weight(), newVals);    }        }  /**   * Transform an instance in original (unormalized) format. Convert back   * to the original space if requested.   * @param instance an instance in the original (unormalized) format   * @return a transformed instance   * @throws Exception if instance cant be transformed   */  public Instance convertInstance(Instance instance) throws Exception {    if (m_eigenvalues == null) {      throw new Exception("convertInstance: Principal components not "                          +"built yet");    }    double[] newVals = new double[m_outputNumAtts];    Instance tempInst = (Instance)instance.copy();    if (!instance.equalHeaders(m_trainCopy.instance(0))) {      throw new Exception("Can't convert instance: header's don't match: "                          +"PrincipalComponents");    }    m_replaceMissingFilter.input(tempInst);    m_replaceMissingFilter.batchFinished();    tempInst = m_replaceMissingFilter.output();    if (m_normalize) {      m_normalizeFilter.input(tempInst);      m_normalizeFilter.batchFinished();      tempInst = m_normalizeFilter.output();    }    m_nominalToBinFilter.input(tempInst);    m_nominalToBinFilter.batchFinished();    tempInst = m_nominalToBinFilter.output();    if (m_attributeFilter != null) {      m_attributeFilter.input(tempInst);      m_attributeFilter.batchFinished();      tempInst = m_attributeFilter.output();    }    if (m_hasClass) {       newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());    }    double cumulative = 0;    for (int i = m_numAttribs - 1; i >= 0; i--) {      double tempval = 0.0;      for (int j = 0; j < m_numAttribs; j++) {        tempval += (m_eigenvectors[j][m_sortedEigens[i]] *                     tempInst.value(j));       }      newVals[m_numAttribs - i - 1] = tempval;      cumulative+=m_eigenvalues[m_sortedEigens[i]];      if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {        break;      }    }        if (!m_transBackToOriginal) {      if (instance instanceof SparseInstance) {      return new SparseInstance(instance.weight(), newVals);      } else {        return new Instance(instance.weight(), newVals);      }          } else {      if (instance instanceof SparseInstance) {        return convertInstanceToOriginal(new SparseInstance(instance.weight(),                                                             newVals));      } else {        return convertInstanceToOriginal(new Instance(instance.weight(),                                                      newVals));      }    }  }  /**   * Set up the header for the PC->original space dataset   *    * @return            the output format   * @throws Exception  if something goes wrong   */  private Instances setOutputFormatOriginal() throws Exception {    FastVector attributes = new FastVector();        for (int i = 0; i < m_numAttribs; i++) {      String att = m_trainInstances.attribute(i).name();      attributes.addElement(new Attribute(att));    }        if (m_hasClass) {      attributes.addElement(m_trainCopy.classAttribute().copy());    }    Instances outputFormat =       new Instances(m_trainCopy.relationName()+"->PC->original space",                    attributes, 0);        // set the class to be the last attribute if necessary    if (m_hasClass) {      outputFormat.setClassIndex(outputFormat.numAttributes()-1);    }    return outputFormat;  }  /**   * Set the format for the transformed data   * @return a set of empty Instances (header only) in the new format   * @throws Exception if the output format can't be set   */  private Instances setOutputFormat() throws Exception {    if (m_eigenvalues == null) {      return null;    }    double cumulative = 0.0;    FastVector attributes = new FastVector();     for (int i = m_numAttribs - 1; i >= 0; i--) {       StringBuffer attName = new StringBuffer();       // build array of coefficients       double[] coeff_mags = new double[m_numAttribs];       for (int j = 0; j < m_numAttribs; j++)         coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]);       int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs;       // this array contains the sorted indices of the coefficients       int[] coeff_inds;       if (m_numAttribs > 0) {          // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude          coeff_inds = Utils.sort(coeff_mags);       } else {          // if  m_maxAttrsInName <= 0, use all coeffs in original order          coeff_inds = new int[m_numAttribs];          for (int j=0; j<m_numAttribs; j++)            coeff_inds[j] = j;       }       // build final attName string       for (int j = 0; j < num_attrs; j++) {         double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]];         if (j > 0 && coeff_value >= 0)           attName.append("+");         attName.append(Utils.doubleToString(coeff_value,5,3)                        +m_trainInstances.attribute(coeff_inds[j]).name());       }       if (num_attrs < m_numAttribs)         attName.append("...");                attributes.addElement(new Attribute(attName.toString()));       cumulative+=m_eigenvalues[m_sortedEigens[i]];       if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {         break;       }     }          if (m_hasClass) {       attributes.addElement(m_trainCopy.classAttribute().copy());     }     Instances outputFormat =        new Instances(m_trainInstances.relationName()+"_principal components",                     attributes, 0);     // set the class to be the last attribute if necessary     if (m_hasClass) {       outputFormat.setClassIndex(outputFormat.numAttributes()-1);     }          m_outputNumAtts = outputFormat.numAttributes();     return outputFormat;  }  /**   * Main method for testing this class   * @param argv should contain the command line arguments to the   * evaluator/transformer (see AttributeSelection)   */  public static void main(String [] argv) {    runEvaluator(new PrincipalComponents(), argv);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -