principalcomponents.java
来自「Weka」· Java 代码 · 共 803 行 · 第 1/2 页
JAVA
803 行
result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.NUMERIC_CLASS); result.enable(Capability.DATE_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); return result; } /** * Determines the output format based on the input format and returns * this. In case the output format cannot be returned immediately, i.e., * immediateOutputFormat() returns false, then this method will be called * from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { double cumulative; FastVector attributes; int i; int j; StringBuffer attName; double[] coeff_mags; int num_attrs; int[] coeff_inds; double coeff_value; int numAttsLowerBound; if (m_Eigenvalues == null) return inputFormat; if (m_MaxAttributes > 0) numAttsLowerBound = m_NumAttribs - m_MaxAttributes; else numAttsLowerBound = 0; if (numAttsLowerBound < 0) numAttsLowerBound = 0; cumulative = 0.0; attributes = new FastVector(); for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) { attName = new StringBuffer(); // build array of coefficients coeff_mags = new double[m_NumAttribs]; for (j = 0; j < m_NumAttribs; j++) coeff_mags[j] = -Math.abs(m_Eigenvectors[j][m_SortedEigens[i]]); num_attrs = (m_MaxAttrsInName > 0) ? Math.min(m_NumAttribs, m_MaxAttrsInName) : m_NumAttribs; // this array contains the sorted indices of the coefficients if (m_NumAttribs > 0) { // if m_maxAttrsInName > 0, sort coefficients by decreasing magnitude coeff_inds = Utils.sort(coeff_mags); } else { // if m_maxAttrsInName <= 0, use all coeffs in original order coeff_inds = new int[m_NumAttribs]; for (j = 0; j < m_NumAttribs; j++) coeff_inds[j] = j; } // build final attName string for (j = 0; j < num_attrs; j++) { coeff_value = m_Eigenvectors[coeff_inds[j]][m_SortedEigens[i]]; if (j > 0 && coeff_value >= 0) attName.append("+"); attName.append( Utils.doubleToString(coeff_value,5,3) + inputFormat.attribute(coeff_inds[j]).name()); } if (num_attrs < m_NumAttribs) attName.append("..."); attributes.addElement(new Attribute(attName.toString())); cumulative += m_Eigenvalues[m_SortedEigens[i]]; if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance) break; } if (m_HasClass) attributes.addElement(m_TrainCopy.classAttribute().copy()); Instances outputFormat = new Instances( m_TrainCopy.relationName() + "_principal components", attributes, 0); // set the class to be the last attribute if necessary if (m_HasClass) outputFormat.setClassIndex(outputFormat.numAttributes() - 1); m_OutputNumAtts = outputFormat.numAttributes(); return outputFormat; } /** * Fill the correlation matrix. */ protected void fillCorrelation() { int i; int j; int k; double[] att1; double[] att2; double corr; m_Correlation = new double[m_NumAttribs][m_NumAttribs]; att1 = new double [m_NumInstances]; att2 = new double [m_NumInstances]; for (i = 0; i < m_NumAttribs; i++) { for (j = 0; j < m_NumAttribs; j++) { if (i == j) { m_Correlation[i][j] = 1.0; } else { for (k = 0; k < m_NumInstances; k++) { att1[k] = m_TrainInstances.instance(k).value(i); att2[k] = m_TrainInstances.instance(k).value(j); } corr = Utils.correlation(att1,att2,m_NumInstances); m_Correlation[i][j] = corr; m_Correlation[j][i] = corr; } } } } /** * Transform an instance in original (unormalized) format. * * @param instance an instance in the original (unormalized) format * @return a transformed instance * @throws Exception if instance can't be transformed */ protected Instance convertInstance(Instance instance) throws Exception { Instance result; double[] newVals; Instance tempInst; double cumulative; int i; int j; double tempval; int numAttsLowerBound; newVals = new double[m_OutputNumAtts]; tempInst = (Instance) instance.copy(); m_ReplaceMissingFilter.input(tempInst); m_ReplaceMissingFilter.batchFinished(); tempInst = m_ReplaceMissingFilter.output(); if (m_Normalize) { m_NormalizeFilter.input(tempInst); m_NormalizeFilter.batchFinished(); tempInst = m_NormalizeFilter.output(); } m_NominalToBinaryFilter.input(tempInst); m_NominalToBinaryFilter.batchFinished(); tempInst = m_NominalToBinaryFilter.output(); if (m_AttributeFilter != null) { m_AttributeFilter.input(tempInst); m_AttributeFilter.batchFinished(); tempInst = m_AttributeFilter.output(); } if (m_HasClass) newVals[m_OutputNumAtts - 1] = instance.value(instance.classIndex()); if (m_MaxAttributes > 0) numAttsLowerBound = m_NumAttribs - m_MaxAttributes; else numAttsLowerBound = 0; if (numAttsLowerBound < 0) numAttsLowerBound = 0; cumulative = 0; for (i = m_NumAttribs - 1; i >= numAttsLowerBound; i--) { tempval = 0.0; for (j = 0; j < m_NumAttribs; j++) tempval += m_Eigenvectors[j][m_SortedEigens[i]] * tempInst.value(j); newVals[m_NumAttribs - i - 1] = tempval; cumulative += m_Eigenvalues[m_SortedEigens[i]]; if ((cumulative / m_SumOfEigenValues) >= m_CoverVariance) break; } // create instance if (instance instanceof SparseInstance) result = new SparseInstance(instance.weight(), newVals); else result = new Instance(instance.weight(), newVals); return result; } /** * Initializes the filter with the given input data. * * @param instances the data to process * @throws Exception in case the processing goes wrong * @see #batchFinished() */ protected void setup(Instances instances) throws Exception { int i; int j; Vector<Integer> deleteCols; int[] todelete; double[][] v; Matrix corr; EigenvalueDecomposition eig; Matrix V; m_TrainInstances = new Instances(instances); // make a copy of the training data so that we can get the class // column to append to the transformed data (if necessary) m_TrainCopy = new Instances(m_TrainInstances); m_ReplaceMissingFilter = new ReplaceMissingValues(); m_ReplaceMissingFilter.setInputFormat(m_TrainInstances); m_TrainInstances = Filter.useFilter(m_TrainInstances, m_ReplaceMissingFilter); if (m_Normalize) { m_NormalizeFilter = new Normalize(); m_NormalizeFilter.setInputFormat(m_TrainInstances); m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NormalizeFilter); } m_NominalToBinaryFilter = new NominalToBinary(); m_NominalToBinaryFilter.setInputFormat(m_TrainInstances); m_TrainInstances = Filter.useFilter(m_TrainInstances, m_NominalToBinaryFilter); // delete any attributes with only one distinct value or are all missing deleteCols = new Vector<Integer>(); for (i = 0; i < m_TrainInstances.numAttributes(); i++) { if (m_TrainInstances.numDistinctValues(i) <= 1) deleteCols.addElement(i); } if (m_TrainInstances.classIndex() >=0) { // get rid of the class column m_HasClass = true; m_ClassIndex = m_TrainInstances.classIndex(); deleteCols.addElement(new Integer(m_ClassIndex)); } // remove columns from the data if necessary if (deleteCols.size() > 0) { m_AttributeFilter = new Remove(); todelete = new int [deleteCols.size()]; for (i = 0; i < deleteCols.size(); i++) todelete[i] = ((Integer)(deleteCols.elementAt(i))).intValue(); m_AttributeFilter.setAttributeIndicesArray(todelete); m_AttributeFilter.setInvertSelection(false); m_AttributeFilter.setInputFormat(m_TrainInstances); m_TrainInstances = Filter.useFilter(m_TrainInstances, m_AttributeFilter); } // can evaluator handle the processed data ? e.g., enough attributes? getCapabilities().testWithFail(m_TrainInstances); m_NumInstances = m_TrainInstances.numInstances(); m_NumAttribs = m_TrainInstances.numAttributes(); fillCorrelation(); // get eigen vectors/values corr = new Matrix(m_Correlation); eig = corr.eig(); V = eig.getV(); v = new double[m_NumAttribs][m_NumAttribs]; for (i = 0; i < v.length; i++) { for (j = 0; j < v[0].length; j++) v[i][j] = V.get(i, j); } m_Eigenvectors = (double[][]) v.clone(); m_Eigenvalues = (double[]) eig.getRealEigenvalues().clone(); // any eigenvalues less than 0 are not worth anything --- change to 0 for (i = 0; i < m_Eigenvalues.length; i++) { if (m_Eigenvalues[i] < 0) m_Eigenvalues[i] = 0.0; } m_SortedEigens = Utils.sort(m_Eigenvalues); m_SumOfEigenValues = Utils.sum(m_Eigenvalues); m_TransformedFormat = determineOutputFormat(m_TrainInstances); setOutputFormat(m_TransformedFormat); } /** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input * instance structure (any instances contained * in the object are ignored - only the structure * is required). * @return true if the outputFormat may be collected * immediately * @throws Exception if the input format can't be set successfully */ public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); m_Eigenvalues = null; m_OutputNumAtts = -1; m_AttributeFilter = null; m_NominalToBinaryFilter = null; m_SumOfEigenValues = 0.0; return false; } /** * Input an instance for filtering. Filter requires all * training instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input format has been set * @throws Exception if conversion fails */ public boolean input(Instance instance) throws Exception { Instance inst; if (getInputFormat() == null) throw new IllegalStateException("No input instance format defined"); if (isNewBatch()) { resetQueue(); m_NewBatch = false; } if (isFirstBatchDone()) { inst = convertInstance(instance); inst.setDataset(getOutputFormat()); push(inst); return true; } else { bufferInput(instance); return false; } } /** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws NullPointerException if no input structure has been defined, * @throws Exception if there was a problem finishing the batch. */ public boolean batchFinished() throws Exception { int i; Instances insts; Instance inst; if (getInputFormat() == null) throw new NullPointerException("No input instance format defined"); insts = getInputFormat(); if (!isFirstBatchDone()) setup(insts); for (i = 0; i < insts.numInstances(); i++) { inst = convertInstance(insts.instance(i)); inst.setDataset(getOutputFormat()); push(inst); } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); } /** * Main method for running this filter. * * @param args should contain arguments to the filter: use -h for help */ public static void main(String[] args) { runFilter(new PrincipalComponents(), args); }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?