📄 principalcomponents.java
字号:
Instance converted = convertInstance(m_trainCopy.instance(i)); output.add(converted); } return output; } /** * Evaluates the merit of a transformed attribute. This is defined * to be 1 minus the cumulative variance explained. Merit can't * be meaningfully evaluated if the data is to be transformed back * to the original space. * @param att the attribute to be evaluated * @return the merit of a transformed attribute * @exception Exception if attribute can't be evaluated */ public double evaluateAttribute(int att) throws Exception { if (m_eigenvalues == null) { throw new Exception("Principal components hasn't been built yet!"); } if (m_transBackToOriginal) { return 1.0; // can't evaluate back in the original space! } // return 1-cumulative variance explained for this transformed att double cumulative = 0.0; for (int i=m_numAttribs;i>=m_numAttribs-att;i--) { cumulative += m_eigenvalues[m_sortedEigens[i]]; } return 1.0-cumulative/m_sumOfEigenValues; } /** * Fill the correlation matrix */ private void fillCorrelation() { m_correlation = new double[m_numAttribs+1][m_numAttribs+1]; double [] att1 = new double [m_numInstances]; double [] att2 = new double [m_numInstances]; double corr; for (int i=1;i<=m_numAttribs;i++) { for (int j=1;j<=m_numAttribs;j++) { if (i == j) { m_correlation[i][j] = 1.0; } else { for (int k=0;k<m_numInstances;k++) { att1[k] = m_trainInstances.instance(k).value(i-1); att2[k] = m_trainInstances.instance(k).value(j-1); } corr = Utils.correlation(att1,att2,m_numInstances); m_correlation[i][j] = corr; m_correlation[i][j] = corr; } } } } /** * Return a summary of the analysis * @return a summary of the analysis. */ private String principalComponentsSummary() { StringBuffer result = new StringBuffer(); double cumulative = 0.0; Instances output = null; int numVectors=0; try { output = setOutputFormat(); numVectors = (output.classIndex() < 0) ? output.numAttributes() : output.numAttributes()-1; } catch (Exception ex) { } result.append("Correlation matrix\n"+matrixToString(m_correlation) +"\n\n"); result.append("eigenvalue\tproportion\tcumulative\n"); for (int i=m_numAttribs;i>(m_numAttribs-numVectors);i--) { cumulative+=m_eigenvalues[m_sortedEigens[i]]; result.append(Utils.doubleToString(m_eigenvalues[m_sortedEigens[i]],9,5) +"\t"+Utils. doubleToString((m_eigenvalues[m_sortedEigens[i]] / m_sumOfEigenValues), 9,5) +"\t"+Utils.doubleToString((cumulative / m_sumOfEigenValues),9,5) +"\t"+output.attribute(m_numAttribs-i).name()+"\n"); } result.append("\nEigenvectors\n"); for (int j=1;j<=numVectors;j++) { result.append(" V"+j+'\t'); } result.append("\n"); for (int j=1;j<=m_numAttribs;j++) { for (int i=m_numAttribs;i>(m_numAttribs-numVectors);i--) { result.append(Utils. doubleToString(m_eigenvectors[j][m_sortedEigens[i]],7,4) +"\t"); } result.append(m_trainInstances.attribute(j-1).name()+'\n'); } if (m_transBackToOriginal) { result.append("\nPC space transformed back to original space.\n" +"(Note: can't evaluate attributes in the original " +"space)\n"); } return result.toString(); } /** * Returns a description of this attribute transformer * @return a String describing this attribute transformer */ public String toString() { if (m_eigenvalues == null) { return "Principal components hasn't been built yet!"; } else { return "\tPrincipal Components Attribute Transformer\n\n" +principalComponentsSummary(); } } /** * Return a matrix as a String * @return a String describing a matrix */ private String matrixToString(double [][] matrix) { StringBuffer result = new StringBuffer(); int size = matrix.length-1; for (int i=1;i<=size;i++) { for (int j=1;j<=size;j++) { result.append(Utils.doubleToString(matrix[i][j],6,2)+" "); if (j == size) { result.append('\n'); } } } return result.toString(); } /** * Convert a pc transformed instance back to the original space */ private Instance convertInstanceToOriginal(Instance inst) throws Exception { double[] newVals; if (m_hasClass) { newVals = new double[m_numAttribs+1]; } else { newVals = new double[m_numAttribs]; } if (m_hasClass) { // class is always appended as the last attribute newVals[m_numAttribs] = inst.value(inst.numAttributes()-1); } for (int i=1;i<m_eTranspose[0].length;i++) { double tempval = 0.0; for (int j=1;j<m_eTranspose.length;j++) { tempval += (m_eTranspose[j][i] * inst.value(j - 1)); } newVals[i - 1] = tempval; } if (inst instanceof SparseInstance) { return new SparseInstance(inst.weight(), newVals); } else { return new Instance(inst.weight(), newVals); } } /** * Transform an instance in original (unormalized) format. Convert back * to the original space if requested. * @param instance an instance in the original (unormalized) format * @return a transformed instance * @exception Exception if instance cant be transformed */ public Instance convertInstance(Instance instance) throws Exception { if (m_eigenvalues == null) { throw new Exception("convertInstance: Principal components not " +"built yet"); } double[] newVals = new double[m_outputNumAtts]; Instance tempInst = (Instance)instance.copy(); if (!instance.equalHeaders(m_trainCopy.instance(0))) { throw new Exception("Can't convert instance: header's don't match: " +"PrincipalComponents"); } m_replaceMissingFilter.input(tempInst); m_replaceMissingFilter.batchFinished(); tempInst = m_replaceMissingFilter.output(); if (m_normalize) { m_normalizeFilter.input(tempInst); m_normalizeFilter.batchFinished(); tempInst = m_normalizeFilter.output(); } m_nominalToBinFilter.input(tempInst); m_nominalToBinFilter.batchFinished(); tempInst = m_nominalToBinFilter.output(); if (m_attributeFilter != null) { m_attributeFilter.input(tempInst); m_attributeFilter.batchFinished(); tempInst = m_attributeFilter.output(); } if (m_hasClass) { newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex()); } double cumulative = 0; for (int i = m_numAttribs; i >= 1; i--) { double tempval = 0.0; for (int j = 1; j <= m_numAttribs; j++) { tempval += (m_eigenvectors[j][m_sortedEigens[i]] * tempInst.value(j - 1)); } newVals[m_numAttribs - i] = tempval; cumulative+=m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (!m_transBackToOriginal) { if (instance instanceof SparseInstance) { return new SparseInstance(instance.weight(), newVals); } else { return new Instance(instance.weight(), newVals); } } else { if (instance instanceof SparseInstance) { return convertInstanceToOriginal(new SparseInstance(instance.weight(), newVals)); } else { return convertInstanceToOriginal(new Instance(instance.weight(), newVals)); } } } /** * Set up the header for the PC->original space dataset */ private Instances setOutputFormatOriginal() throws Exception { FastVector attributes = new FastVector(); for (int i=0;i<m_numAttribs;i++) { String att = m_trainInstances.attribute(i).name(); attributes.addElement(new Attribute(att)); } if (m_hasClass) { attributes.addElement(m_trainCopy.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainCopy.relationName()+"->PC->original space", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes()-1); } return outputFormat; } /** * Set the format for the transformed data * @return a set of empty Instances (header only) in the new format * @exception Exception if the output format can't be set */ private Instances setOutputFormat() throws Exception { if (m_eigenvalues == null) { return null; } double cumulative = 0.0; FastVector attributes = new FastVector(); for (int i=m_numAttribs;i>=1;i--) { StringBuffer attName = new StringBuffer(); for (int j=1;j<=m_numAttribs;j++) { attName.append(Utils. doubleToString(m_eigenvectors[j][m_sortedEigens[i]], 5,3) +m_trainInstances.attribute(j-1).name()); if (j != m_numAttribs) { if (m_eigenvectors[j+1][m_sortedEigens[i]] >= 0) { attName.append("+"); } } } attributes.addElement(new Attribute(attName.toString())); cumulative+=m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (m_hasClass) { attributes.addElement(m_trainCopy.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainInstances.relationName()+"_principal components", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes()-1); } m_outputNumAtts = outputFormat.numAttributes(); return outputFormat; } // jacobi routine adapted from numerical recipies // note arrays are from 1..n inclusive void jacobi(double [][] a, int n, double [] d, double [][] v) { int j,iq,ip,i; double tresh,theta,tau,t,sm,s,h,g,c; double [] b; double [] z; b = new double [n+1]; z = new double [n+1]; for (ip=1;ip<=n;ip++) { for (iq=1;iq<=n;iq++) v[ip][iq]=0.0; v[ip][ip]=1.0; } for (ip=1;ip<=n;ip++) { b[ip]=d[ip]=a[ip][ip]; z[ip]=0.0; } // *nrot=0; for (i=1;i<=50;i++) { sm=0.0; for (ip=1;ip<=n-1;ip++) { for (iq=ip+1;iq<=n;iq++) sm += Math.abs(a[ip][iq]); } if (sm == 0.0) { // free_vector(z,1,n); // free_vector(b,1,n); return; } if (i < 4) tresh=0.2*sm/(n*n); else tresh=0.0; for (ip=1;ip<=n-1;ip++) { for (iq=ip+1;iq<=n;iq++) { g=100.0*Math.abs(a[ip][iq]); if (i > 4 && (double)(Math.abs(d[ip])+g) == (double)Math.abs(d[ip]) && (double)(Math.abs(d[iq])+g) == (double)Math.abs(d[iq])) a[ip][iq]=0.0; else if (Math.abs(a[ip][iq]) > tresh) { h=d[iq]-d[ip]; if ((double)(Math.abs(h)+g) == (double)Math.abs(h)) t=(a[ip][iq])/h; else { theta=0.5*h/(a[ip][iq]); t=1.0/(Math.abs(theta)+Math.sqrt(1.0+theta*theta)); if (theta < 0.0) t = -t; } c=1.0/Math.sqrt(1+t*t); s=t*c; tau=s/(1.0+c); h=t*a[ip][iq]; z[ip] -= h; z[iq] += h; d[ip] -= h; d[iq] += h; a[ip][iq]=0.0; for (j=1;j<=ip-1;j++) { // rotate(a,j,ip,j,iq) g=a[j][ip]; h=a[j][iq]; a[j][ip]=g-s*(h+g*tau); a[j][iq]=h+s*(g-h*tau); } for (j=ip+1;j<=iq-1;j++) { // rotate(a,ip,j,j,iq) g=a[ip][j]; h=a[j][iq]; a[ip][j]=g-s*(h+g*tau); a[j][iq]=h+s*(g-h*tau); } for (j=iq+1;j<=n;j++) { // rotate(a,ip,j,iq,j) g=a[ip][j]; h=a[iq][j]; a[ip][j]=g-s*(h+g*tau); a[iq][j]=h+s*(g-h*tau); } for (j=1;j<=n;j++) { // rotate(v,j,ip,j,iq) g=v[j][ip]; h=v[j][iq]; v[j][ip]=g-s*(h+g*tau); v[j][iq]=h+s*(g-h*tau); } // ++(*nrot); } } } for (ip=1;ip<=n;ip++) { b[ip] += z[ip]; d[ip]=b[ip]; z[ip]=0.0; } } System.err.println("Too many iterations in routine jacobi"); } /** * Main method for testing this class * @param argv should contain the command line arguments to the * evaluator/transformer (see AttributeSelection) */ public static void main(String [] argv) { try { System.out.println(AttributeSelection. SelectAttributes(new PrincipalComponents(), argv)); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -