⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 principalcomponents.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	: m_transformedFormat.numAttributes() - 1;

      double [][] orderedVectors =
	new double [m_eigenvectors.length][numVectors + 1];

      // try converting back to the original space
      for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
	for (int j = 0; j < m_numAttribs; j++) {
	  orderedVectors[j][m_numAttribs - i] =
	    m_eigenvectors[j][m_sortedEigens[i]];
	}
      }

      // transpose the matrix
      int nr = orderedVectors.length;
      int nc = orderedVectors[0].length;
      m_eTranspose =
	new double [nc][nr];
      for (int i = 0; i < nc; i++) {
	for (int j = 0; j < nr; j++) {
	  m_eTranspose[i][j] = orderedVectors[j][i];
	}
      }
    }
  }

  /**
   * Returns just the header for the transformed data (ie. an empty
   * set of instances. This is so that AttributeSelection can
   * determine the structure of the transformed data without actually
   * having to get all the transformed data through getTransformedData().
   * @return the header of the transformed data.
   * @exception Exception if the header of the transformed data can't
   * be determined.
   */
  public Instances transformedHeader() throws Exception {
    if (m_eigenvalues == null) {
      throw new Exception("Principal components hasn't been built yet");
    }
    if (m_transBackToOriginal) {
      return m_originalSpaceFormat;
    } else {
      return m_transformedFormat;
    }
  }

  /**
   * Gets the transformed training data.
   * @return the transformed training data
   * @exception Exception if transformed data can't be returned
   */
  public Instances transformedData() throws Exception {
    if (m_eigenvalues == null) {
      throw new Exception("Principal components hasn't been built yet");
    }

    Instances output;

    if (m_transBackToOriginal) {
      output = new Instances(m_originalSpaceFormat);
    } else {
      output = new Instances(m_transformedFormat);
    }
    for (int i=0;i<m_trainCopy.numInstances();i++) {
      Instance converted = convertInstance(m_trainCopy.instance(i));
      output.add(converted);
    }

    return output;
  }

  /**
   * Evaluates the merit of a transformed attribute. This is defined
   * to be 1 minus the cumulative variance explained. Merit can't
   * be meaningfully evaluated if the data is to be transformed back
   * to the original space.
   * @param att the attribute to be evaluated
   * @return the merit of a transformed attribute
   * @exception Exception if attribute can't be evaluated
   */
  public double evaluateAttribute(int att) throws Exception {
    if (m_eigenvalues == null) {
      throw new Exception("Principal components hasn't been built yet!");
    }

    if (m_transBackToOriginal) {
      return 1.0; // can't evaluate back in the original space!
    }

    // return 1-cumulative variance explained for this transformed att
    double cumulative = 0.0;
    for (int i = m_numAttribs - 1; i >= m_numAttribs - att - 1; i--) {
      cumulative += m_eigenvalues[m_sortedEigens[i]];
    }

    return 1.0 - cumulative / m_sumOfEigenValues;
  }

  /**
   * Fill the correlation matrix
   */
  private void fillCorrelation() {
    m_correlation = new double[m_numAttribs][m_numAttribs];
    double [] att1 = new double [m_numInstances];
    double [] att2 = new double [m_numInstances];
    double corr;

    for (int i = 0; i < m_numAttribs; i++) {
      for (int j = 0; j < m_numAttribs; j++) {
	if (i == j) {
	  m_correlation[i][j] = 1.0;
	} else {
	  for (int k = 0; k < m_numInstances; k++) {
	    att1[k] = m_trainInstances.instance(k).value(i);
	    att2[k] = m_trainInstances.instance(k).value(j);
	  }
	  corr = Utils.correlation(att1,att2,m_numInstances);
	  m_correlation[i][j] = corr;
	  m_correlation[i][j] = corr;
	}
      }
    }
  }

  /**
   * Return a summary of the analysis
   * @return a summary of the analysis.
   */
  private String principalComponentsSummary() {
    StringBuffer result = new StringBuffer();
    double cumulative = 0.0;
    Instances output = null;
    int numVectors=0;

    try {
      output = setOutputFormat();
      numVectors = (output.classIndex() < 0)
	? output.numAttributes()
	: output.numAttributes()-1;
    } catch (Exception ex) {
    }
    //tomorrow
    result.append("Correlation matrix\n"+matrixToString(m_correlation)
		  +"\n\n");
    result.append("eigenvalue\tproportion\tcumulative\n");
    for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
      cumulative+=m_eigenvalues[m_sortedEigens[i]];
      result.append(Utils.doubleToString(m_eigenvalues[m_sortedEigens[i]],9,5)
		    +"\t"+Utils.
		    doubleToString((m_eigenvalues[m_sortedEigens[i]] /
				    m_sumOfEigenValues),
				     9,5)
		    +"\t"+Utils.doubleToString((cumulative /
						m_sumOfEigenValues),9,5)
		    +"\t"+output.attribute(m_numAttribs - i - 1).name()+"\n");
    }

    result.append("\nEigenvectors\n");
    for (int j = 1;j <= numVectors;j++) {
      result.append(" V"+j+'\t');
    }
    result.append("\n");
    for (int j = 0; j < m_numAttribs; j++) {

      for (int i = m_numAttribs - 1; i > (m_numAttribs - numVectors - 1); i--) {
	result.append(Utils.
		      doubleToString(m_eigenvectors[j][m_sortedEigens[i]],7,4)
		      +"\t");
      }
      result.append(m_trainInstances.attribute(j).name()+'\n');
    }

    if (m_transBackToOriginal) {
      result.append("\nPC space transformed back to original space.\n"
		    +"(Note: can't evaluate attributes in the original "
		    +"space)\n");
    }
    return result.toString();
  }

  /**
   * Returns a description of this attribute transformer
   * @return a String describing this attribute transformer
   */
  public String toString() {
    if (m_eigenvalues == null) {
      return "Principal components hasn't been built yet!";
    } else {
      return "\tPrincipal Components Attribute Transformer\n\n"
	+principalComponentsSummary();
    }
  }

  /**
   * Return a matrix as a String
   * @param matrix that is decribed as a string
   * @return a String describing a matrix
   */
  private String matrixToString(double [][] matrix) {
    StringBuffer result = new StringBuffer();
    int last = matrix.length - 1;

    for (int i = 0; i <= last; i++) {
      for (int j = 0; j <= last; j++) {
	result.append(Utils.doubleToString(matrix[i][j],6,2)+" ");
	if (j == last) {
	  result.append('\n');
	}
      }
    }
    return result.toString();
  }

  /**
   * Convert a pc transformed instance back to the original space
   */
  private Instance convertInstanceToOriginal(Instance inst)
    throws Exception {
    double[] newVals;

    if (m_hasClass) {
      newVals = new double[m_numAttribs+1];
    } else {
      newVals = new double[m_numAttribs];
    }

    if (m_hasClass) {
      // class is always appended as the last attribute
      newVals[m_numAttribs] = inst.value(inst.numAttributes());
    }

    for (int i = 1; i < m_eTranspose[0].length; i++) {
      double tempval = 0.0;
      for (int j=1;j<m_eTranspose.length;j++) {
	tempval += (m_eTranspose[j][i] *
		    inst.value(j));
       }
      newVals[i - 1] = tempval;
    }

    if (inst instanceof SparseInstance) {
      return new SparseInstance(inst.weight(), newVals);
    } else {
      return new Instance(inst.weight(), newVals);
    }
  }

  /**
   * Transform an instance in original (unormalized) format. Convert back
   * to the original space if requested.
   * @param instance an instance in the original (unormalized) format
   * @return a transformed instance
   * @exception Exception if instance cant be transformed
   */
  public Instance convertInstance(Instance instance) throws Exception {

    if (m_eigenvalues == null) {
      throw new Exception("convertInstance: Principal components not "
			  +"built yet");
    }

    double[] newVals = new double[m_outputNumAtts];
    Instance tempInst = (Instance)instance.copy();
    if (!instance.equalHeaders(m_trainCopy.instance(0))) {
      throw new Exception("Can't convert instance: header's don't match: "
			  +"PrincipalComponents");
    }

    m_replaceMissingFilter.input(tempInst);
    m_replaceMissingFilter.batchFinished();
    tempInst = m_replaceMissingFilter.output();

    if (m_normalize) {
      m_normalizeFilter.input(tempInst);
      m_normalizeFilter.batchFinished();
      tempInst = m_normalizeFilter.output();
    }

    m_nominalToBinFilter.input(tempInst);
    m_nominalToBinFilter.batchFinished();
    tempInst = m_nominalToBinFilter.output();

    if (m_attributeFilter != null) {
      m_attributeFilter.input(tempInst);
      m_attributeFilter.batchFinished();
      tempInst = m_attributeFilter.output();
    }

    if (m_hasClass) {
       newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());
    }

    double cumulative = 0;
    for (int i = m_numAttribs - 1; i >= 0; i--) {
      double tempval = 0.0;
      for (int j = 0; j < m_numAttribs; j++) {
	tempval += (m_eigenvectors[j][m_sortedEigens[i]] *
		    tempInst.value(j));
       }
      newVals[m_numAttribs - i] = tempval;
      cumulative+=m_eigenvalues[m_sortedEigens[i]];
      if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
	break;
      }
    }

    if (!m_transBackToOriginal) {
      if (instance instanceof SparseInstance) {
      return new SparseInstance(instance.weight(), newVals);
      } else {
	return new Instance(instance.weight(), newVals);
      }
    } else {
      if (instance instanceof SparseInstance) {
	return convertInstanceToOriginal(new SparseInstance(instance.weight(),
							    newVals));
      } else {
	return convertInstanceToOriginal(new Instance(instance.weight(),
						      newVals));
      }
    }
  }

  /**
   * Set up the header for the PC->original space dataset
   */
  private Instances setOutputFormatOriginal() throws Exception {
    FastVector attributes = new FastVector();

    for (int i = 0; i < m_numAttribs; i++) {
      String att = m_trainInstances.attribute(i).name();
      attributes.addElement(new Attribute(att));
    }

    if (m_hasClass) {
      attributes.addElement(m_trainCopy.classAttribute().copy());
    }

    Instances outputFormat =
      new Instances(m_trainCopy.relationName()+"->PC->original space",
		    attributes, 0);

    // set the class to be the last attribute if necessary
    if (m_hasClass) {
      outputFormat.setClassIndex(outputFormat.numAttributes()-1);
    }

    return outputFormat;
  }

  /**
   * Set the format for the transformed data
   * @return a set of empty Instances (header only) in the new format
   * @exception Exception if the output format can't be set
   */
  private Instances setOutputFormat() throws Exception {
    if (m_eigenvalues == null) {
      return null;
    }

    double cumulative = 0.0;
    FastVector attributes = new FastVector();
     for (int i = m_numAttribs - 1; i >= 0; i--) {
       StringBuffer attName = new StringBuffer();
       for (int j = 0; j < m_numAttribs; j++) {
	 attName.append(Utils.
			doubleToString(m_eigenvectors[j][m_sortedEigens[i]],
				       5,3)
			+m_trainInstances.attribute(j).name());
	 if (j != m_numAttribs - 1) {
	   if (m_eigenvectors[j+1][m_sortedEigens[i]] >= 0) {
	     attName.append("+");
	   }
	 }
       }
       attributes.addElement(new Attribute(attName.toString()));
       cumulative+=m_eigenvalues[m_sortedEigens[i]];

       if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {
	 break;
       }
     }

     if (m_hasClass) {
       attributes.addElement(m_trainCopy.classAttribute().copy());
     }

     Instances outputFormat =
       new Instances(m_trainInstances.relationName()+"_principal components",
		     attributes, 0);

     // set the class to be the last attribute if necessary
     if (m_hasClass) {
       outputFormat.setClassIndex(outputFormat.numAttributes()-1);
     }

     m_outputNumAtts = outputFormat.numAttributes();
     return outputFormat;
  }


  /**
   * Main method for testing this class
   * @param argv should contain the command line arguments to the
   * evaluator/transformer (see AttributeSelection)
   */
  public static void main(String [] argv) {
    try {
      System.out.println(AttributeSelection.
			 SelectAttributes(new PrincipalComponents(), argv));
    }
    catch (Exception e) {
      log.error(e.getStackTrace().toString());
      log.error(e.getMessage());
    }
  }

}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -