principalcomponents.java

来自「Weka」· Java 代码 · 共 803 行 · 第 1/2 页

JAVA
803
字号
    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.NUMERIC_CLASS);    result.enable(Capability.DATE_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);    result.enable(Capability.NO_CLASS);    return result;  }  /**   * Determines the output format based on the input format and returns    * this. In case the output format cannot be returned immediately, i.e.,   * immediateOutputFormat() returns false, then this method will be called   * from batchFinished().   *   * @param inputFormat     the input format to base the output format on   * @return                the output format   * @throws Exception      in case the determination goes wrong   * @see   #hasImmediateOutputFormat()   * @see   #batchFinished()   */  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {    double 		cumulative;    FastVector 		attributes;    int 		i;    int 		j;    StringBuffer 	attName;    double[] 		coeff_mags;    int 		num_attrs;    int[] 		coeff_inds;    double 		coeff_value;    int			numAttsLowerBound;        if (m_Eigenvalues == null)      return inputFormat;    if (m_MaxAttributes > 0)      numAttsLowerBound = m_NumAttribs - m_MaxAttributes;    else      numAttsLowerBound = 0;    if (numAttsLowerBound < 0)      numAttsLowerBound = 0;        cumulative = 0.0;    attributes = new FastVector();    for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {      attName = new StringBuffer();      // build array of coefficients      coeff_mags = new double[m_NumAttribs];      for (j = 0; j < m_NumAttribs; j++)	coeff_mags[j] = -Math.abs(m_Eigenvectors[j][m_SortedEigens[i]]);      num_attrs = (m_MaxAttrsInName > 0) ? Math.min(m_NumAttribs, m_MaxAttrsInName) : m_NumAttribs;      // this array contains the sorted indices of the coefficients      if (m_NumAttribs > 0) {	// if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude	coeff_inds = Utils.sort(coeff_mags);      }      else {	// if  m_maxAttrsInName <= 0, use all coeffs in original order	coeff_inds = new int[m_NumAttribs];	for (j = 0; j < m_NumAttribs; j++)	  coeff_inds[j] = j;      }      // build final attName string      for (j = 0; j < num_attrs; j++) {	coeff_value = m_Eigenvectors[coeff_inds[j]][m_SortedEigens[i]];	if (j > 0 && coeff_value >= 0)	  attName.append("+");	attName.append(	    Utils.doubleToString(coeff_value,5,3) 	    + inputFormat.attribute(coeff_inds[j]).name());      }      if (num_attrs < m_NumAttribs)	attName.append("...");      attributes.addElement(new Attribute(attName.toString()));      cumulative += m_Eigenvalues[m_SortedEigens[i]];      if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)	break;    }    if (m_HasClass)      attributes.addElement(m_TrainCopy.classAttribute().copy());    Instances outputFormat =       new Instances(	  m_TrainCopy.relationName() + "_principal components", attributes, 0);    // set the class to be the last attribute if necessary    if (m_HasClass)      outputFormat.setClassIndex(outputFormat.numAttributes() - 1);    m_OutputNumAtts = outputFormat.numAttributes();        return outputFormat;  }  /**   * Fill the correlation matrix.   */  protected void fillCorrelation() {    int		i;    int		j;    int		k;    double[] 	att1;    double[] 	att2;    double 	corr;        m_Correlation = new double[m_NumAttribs][m_NumAttribs];    att1          = new double [m_NumInstances];    att2          = new double [m_NumInstances];    for (i = 0; i < m_NumAttribs; i++) {      for (j = 0; j < m_NumAttribs; j++) {	if (i == j) {	  m_Correlation[i][j] = 1.0;	}	else {	  for (k = 0; k < m_NumInstances; k++) {	    att1[k] = m_TrainInstances.instance(k).value(i);	    att2[k] = m_TrainInstances.instance(k).value(j);	  }	  corr = Utils.correlation(att1,att2,m_NumInstances);	  m_Correlation[i][j] = corr;	  m_Correlation[j][i] = corr;	}      }    }  }  /**   * Transform an instance in original (unormalized) format.   *    * @param instance 	an instance in the original (unormalized) format   * @return 		a transformed instance   * @throws Exception 	if instance can't be transformed   */  protected Instance convertInstance(Instance instance) throws Exception {    Instance	result;    double[] 	newVals;    Instance 	tempInst;    double 	cumulative;    int		i;    int		j;    double 	tempval;    int		numAttsLowerBound;        newVals  = new double[m_OutputNumAtts];    tempInst = (Instance) instance.copy();    m_ReplaceMissingFilter.input(tempInst);    m_ReplaceMissingFilter.batchFinished();    tempInst = m_ReplaceMissingFilter.output();    if (m_Normalize) {      m_NormalizeFilter.input(tempInst);      m_NormalizeFilter.batchFinished();      tempInst = m_NormalizeFilter.output();    }    m_NominalToBinaryFilter.input(tempInst);    m_NominalToBinaryFilter.batchFinished();    tempInst = m_NominalToBinaryFilter.output();    if (m_AttributeFilter != null) {      m_AttributeFilter.input(tempInst);      m_AttributeFilter.batchFinished();      tempInst = m_AttributeFilter.output();    }    if (m_HasClass)      newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex());    if (m_MaxAttributes > 0)      numAttsLowerBound = m_NumAttribs - m_MaxAttributes;    else      numAttsLowerBound = 0;    if (numAttsLowerBound < 0)      numAttsLowerBound = 0;        cumulative = 0;    for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) {      tempval = 0.0;      for (j = 0; j < m_NumAttribs; j++)	tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j);      newVals[m_NumAttribs - i - 1] = tempval;      cumulative += m_Eigenvalues[m_SortedEigens[i]];      if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance)	break;    }    // create instance    if (instance instanceof SparseInstance)      result = new SparseInstance(instance.weight(), newVals);    else      result = new Instance(instance.weight(), newVals);        return result;  }  /**   * Initializes the filter with the given input data.   *   * @param instances   the data to process   * @throws Exception  in case the processing goes wrong   * @see               #batchFinished()   */  protected void setup(Instances instances) throws Exception {    int				i;    int				j;    Vector<Integer> 		deleteCols;    int[] 			todelete;    double[][] 			v;    Matrix 			corr;    EigenvalueDecomposition 	eig;    Matrix 			V;        m_TrainInstances = new Instances(instances);    // make a copy of the training data so that we can get the class    // column to append to the transformed data (if necessary)    m_TrainCopy = new Instances(m_TrainInstances);    m_ReplaceMissingFilter = new ReplaceMissingValues();    m_ReplaceMissingFilter.setInputFormat(m_TrainInstances);    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter);    if (m_Normalize) {      m_NormalizeFilter = new Normalize();      m_NormalizeFilter.setInputFormat(m_TrainInstances);      m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NormalizeFilter);    }    m_NominalToBinaryFilter = new NominalToBinary();    m_NominalToBinaryFilter.setInputFormat(m_TrainInstances);    m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter);    // delete any attributes with only one distinct value or are all missing    deleteCols = new Vector<Integer>();    for (i = 0; i < m_TrainInstances.numAttributes(); i++) {      if (m_TrainInstances.numDistinctValues(i) <= 1)	deleteCols.addElement(i);    }    if (m_TrainInstances.classIndex() >=0) {      // get rid of the class column      m_HasClass = true;      m_ClassIndex = m_TrainInstances.classIndex();      deleteCols.addElement(new Integer(m_ClassIndex));    }    // remove columns from the data if necessary    if (deleteCols.size() > 0) {      m_AttributeFilter = new Remove();      todelete = new int [deleteCols.size()];      for (i = 0; i < deleteCols.size(); i++)	todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue();      m_AttributeFilter.setAttributeIndicesArray(todelete);      m_AttributeFilter.setInvertSelection(false);      m_AttributeFilter.setInputFormat(m_TrainInstances);      m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter);    }    // can evaluator handle the processed data ? e.g., enough attributes?    getCapabilities().testWithFail(m_TrainInstances);    m_NumInstances = m_TrainInstances.numInstances();    m_NumAttribs   = m_TrainInstances.numAttributes();    fillCorrelation();    // get eigen vectors/values    corr = new Matrix(m_Correlation);    eig  = corr.eig();    V    = eig.getV();    v    = new double[m_NumAttribs][m_NumAttribs];    for (i = 0; i < v.length; i++) {      for (j = 0; j < v[0].length; j++)        v[i][j] = V.get(i, j);    }    m_Eigenvectors = (double[][]) v.clone();    m_Eigenvalues  = (double[]) eig.getRealEigenvalues().clone();    // any eigenvalues less than 0 are not worth anything --- change to 0    for (i = 0; i < m_Eigenvalues.length; i++) {      if (m_Eigenvalues[i] < 0)	m_Eigenvalues[i] = 0.0;    }    m_SortedEigens     = Utils.sort(m_Eigenvalues);    m_SumOfEigenValues = Utils.sum(m_Eigenvalues);    m_TransformedFormat = determineOutputFormat(m_TrainInstances);    setOutputFormat(m_TransformedFormat);  }  /**   * Sets the format of the input instances.   *   * @param instanceInfo 	an Instances object containing the input    * 				instance structure (any instances contained    * 				in the object are ignored - only the structure    * 				is required).   * @return 			true if the outputFormat may be collected    * 				immediately   * @throws Exception 		if the input format can't be set successfully   */  public boolean setInputFormat(Instances instanceInfo) throws Exception {    super.setInputFormat(instanceInfo);    m_Eigenvalues           = null;    m_OutputNumAtts         = -1;    m_AttributeFilter       = null;    m_NominalToBinaryFilter = null;    m_SumOfEigenValues      = 0.0;        return false;  }  /**   * Input an instance for filtering. Filter requires all   * training instances be read before producing output.   *   * @param instance 			the input instance   * @return 				true if the filtered instance may now be   * 					collected with output().   * @throws IllegalStateException 	if no input format has been set   * @throws Exception 			if conversion fails   */  public boolean input(Instance instance) throws Exception {    Instance 	inst;        if (getInputFormat() == null)      throw new IllegalStateException("No input instance format defined");    if (isNewBatch()) {      resetQueue();      m_NewBatch = false;    }        if (isFirstBatchDone()) {      inst = convertInstance(instance);      inst.setDataset(getOutputFormat());      push(inst);      return true;    }    else {      bufferInput(instance);      return false;    }  }  /**   * Signify that this batch of input to the filter is finished.   *   * @return true 			if there are instances pending output   * @throws NullPointerException 	if no input structure has been defined,   * @throws Exception 			if there was a problem finishing the batch.   */  public boolean batchFinished() throws Exception {    int		i;    Instances	insts;    Instance	inst;        if (getInputFormat() == null)      throw new NullPointerException("No input instance format defined");    insts = getInputFormat();    if (!isFirstBatchDone())      setup(insts);        for (i = 0; i < insts.numInstances(); i++) {      inst = convertInstance(insts.instance(i));      inst.setDataset(getOutputFormat());      push(inst);    }        flushInput();    m_NewBatch       = true;    m_FirstBatchDone = true;        return (numPendingOutput() != 0);  }  /**   * Main method for running this filter.   *   * @param args 	should contain arguments to the filter: use -h for help   */  public static void main(String[] args) {    runFilter(new PrincipalComponents(), args);  }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?