⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 principalcomponents.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
      Instance converted = convertInstance(m_trainCopy.instance(i));      output.add(converted);    }    return output;  }  /**   * Evaluates the merit of a transformed attribute. This is defined   * to be 1 minus the cumulative variance explained. Merit can't   * be meaningfully evaluated if the data is to be transformed back   * to the original space.   * @param att the attribute to be evaluated   * @return the merit of a transformed attribute   * @exception Exception if attribute can't be evaluated   */  public double evaluateAttribute(int att) throws Exception {    if (m_eigenvalues == null) {      throw new Exception("Principal components hasn't been built yet!");    }    if (m_transBackToOriginal) {      return 1.0; // can't evaluate back in the original space!    }    // return 1-cumulative variance explained for this transformed att    double cumulative = 0.0;    for (int i=m_numAttribs;i>=m_numAttribs-att;i--) {      cumulative += m_eigenvalues[m_sortedEigens[i]];    }    return 1.0-cumulative/m_sumOfEigenValues;  }  /**   * Fill the correlation matrix   */  private void fillCorrelation() {    m_correlation = new double[m_numAttribs+1][m_numAttribs+1];    double [] att1 = new double [m_numInstances];    double [] att2 = new double [m_numInstances];    double corr;    for (int i=1;i<=m_numAttribs;i++) {      for (int j=1;j<=m_numAttribs;j++) {	if (i == j) {	  m_correlation[i][j] = 1.0;	} else {	  for (int k=0;k<m_numInstances;k++) {	    att1[k] = m_trainInstances.instance(k).value(i-1);	    att2[k] = m_trainInstances.instance(k).value(j-1);	  }	  corr = Utils.correlation(att1,att2,m_numInstances);	  m_correlation[i][j] = corr;	  m_correlation[i][j] = corr;	}      }    }  }  /**   * Return a summary of the analysis   * @return a summary of the analysis.   */  private String principalComponentsSummary() {    StringBuffer result = new StringBuffer();    double cumulative = 0.0;    Instances output = null;    int numVectors=0;    try {      output = setOutputFormat();      numVectors = (output.classIndex() < 0) 	? output.numAttributes()	: output.numAttributes()-1;    } catch (Exception ex) {    }    result.append("Correlation matrix\n"+matrixToString(m_correlation)		  +"\n\n");    result.append("eigenvalue\tproportion\tcumulative\n");    for (int i=m_numAttribs;i>(m_numAttribs-numVectors);i--) {      cumulative+=m_eigenvalues[m_sortedEigens[i]];      result.append(Utils.doubleToString(m_eigenvalues[m_sortedEigens[i]],9,5)		    +"\t"+Utils.		    doubleToString((m_eigenvalues[m_sortedEigens[i]] / 				    m_sumOfEigenValues),				     9,5)		    +"\t"+Utils.doubleToString((cumulative / 						m_sumOfEigenValues),9,5)		    +"\t"+output.attribute(m_numAttribs-i).name()+"\n");    }    result.append("\nEigenvectors\n");    for (int j=1;j<=numVectors;j++) {      result.append(" V"+j+'\t');    }    result.append("\n");    for (int j=1;j<=m_numAttribs;j++) {      for (int i=m_numAttribs;i>(m_numAttribs-numVectors);i--) {	result.append(Utils.		      doubleToString(m_eigenvectors[j][m_sortedEigens[i]],7,4)		      +"\t");      }      result.append(m_trainInstances.attribute(j-1).name()+'\n');    }    if (m_transBackToOriginal) {      result.append("\nPC space transformed back to original space.\n"		    +"(Note: can't evaluate attributes in the original "		    +"space)\n");    }    return result.toString();  }  /**   * Returns a description of this attribute transformer   * @return a String describing this attribute transformer   */  public String toString() {    if (m_eigenvalues == null) {      return "Principal components hasn't been built yet!";    } else {      return "\tPrincipal Components Attribute Transformer\n\n"	+principalComponentsSummary();    }  }  /**   * Return a matrix as a String   * @return a String describing a matrix   */  private String matrixToString(double [][] matrix) {    StringBuffer result = new StringBuffer();    int size = matrix.length-1;    for (int i=1;i<=size;i++) {      for (int j=1;j<=size;j++) {	result.append(Utils.doubleToString(matrix[i][j],6,2)+" ");	if (j == size) {	  result.append('\n');	}      }    }    return result.toString();  }  /**   * Convert a pc transformed instance back to the original space   */  private Instance convertInstanceToOriginal(Instance inst)    throws Exception {    double[] newVals;    if (m_hasClass) {      newVals = new double[m_numAttribs+1];    } else {      newVals = new double[m_numAttribs];    }    if (m_hasClass) {      // class is always appended as the last attribute      newVals[m_numAttribs] = inst.value(inst.numAttributes()-1);    }    for (int i=1;i<m_eTranspose[0].length;i++) {      double tempval = 0.0;      for (int j=1;j<m_eTranspose.length;j++) {	tempval += (m_eTranspose[j][i] * 		    inst.value(j - 1));       }      newVals[i - 1] = tempval;    }        if (inst instanceof SparseInstance) {      return new SparseInstance(inst.weight(), newVals);    } else {      return new Instance(inst.weight(), newVals);    }        }  /**   * Transform an instance in original (unormalized) format. Convert back   * to the original space if requested.   * @param instance an instance in the original (unormalized) format   * @return a transformed instance   * @exception Exception if instance cant be transformed   */  public Instance convertInstance(Instance instance) throws Exception {    if (m_eigenvalues == null) {      throw new Exception("convertInstance: Principal components not "			  +"built yet");    }    double[] newVals = new double[m_outputNumAtts];    Instance tempInst = (Instance)instance.copy();    if (!instance.equalHeaders(m_trainCopy.instance(0))) {      throw new Exception("Can't convert instance: header's don't match: "			  +"PrincipalComponents");    }    m_replaceMissingFilter.input(tempInst);    m_replaceMissingFilter.batchFinished();    tempInst = m_replaceMissingFilter.output();    if (m_normalize) {      m_normalizeFilter.input(tempInst);      m_normalizeFilter.batchFinished();      tempInst = m_normalizeFilter.output();    }    m_nominalToBinFilter.input(tempInst);    m_nominalToBinFilter.batchFinished();    tempInst = m_nominalToBinFilter.output();    if (m_attributeFilter != null) {      m_attributeFilter.input(tempInst);      m_attributeFilter.batchFinished();      tempInst = m_attributeFilter.output();    }    if (m_hasClass) {       newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex());    }    double cumulative = 0;    for (int i = m_numAttribs; i >= 1; i--) {      double tempval = 0.0;      for (int j = 1; j <= m_numAttribs; j++) {	tempval += (m_eigenvectors[j][m_sortedEigens[i]] * 		    tempInst.value(j - 1));       }      newVals[m_numAttribs - i] = tempval;      cumulative+=m_eigenvalues[m_sortedEigens[i]];      if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {	break;      }    }        if (!m_transBackToOriginal) {      if (instance instanceof SparseInstance) {      return new SparseInstance(instance.weight(), newVals);      } else {	return new Instance(instance.weight(), newVals);      }          } else {      if (instance instanceof SparseInstance) {	return convertInstanceToOriginal(new SparseInstance(instance.weight(), 							    newVals));      } else {	return convertInstanceToOriginal(new Instance(instance.weight(),						      newVals));      }    }  }  /**   * Set up the header for the PC->original space dataset   */  private Instances setOutputFormatOriginal() throws Exception {    FastVector attributes = new FastVector();        for (int i=0;i<m_numAttribs;i++) {      String att = m_trainInstances.attribute(i).name();      attributes.addElement(new Attribute(att));    }        if (m_hasClass) {      attributes.addElement(m_trainCopy.classAttribute().copy());    }    Instances outputFormat =       new Instances(m_trainCopy.relationName()+"->PC->original space",		    attributes, 0);        // set the class to be the last attribute if necessary    if (m_hasClass) {      outputFormat.setClassIndex(outputFormat.numAttributes()-1);    }    return outputFormat;  }  /**   * Set the format for the transformed data   * @return a set of empty Instances (header only) in the new format   * @exception Exception if the output format can't be set   */  private Instances setOutputFormat() throws Exception {    if (m_eigenvalues == null) {      return null;    }    double cumulative = 0.0;    FastVector attributes = new FastVector();     for (int i=m_numAttribs;i>=1;i--) {       StringBuffer attName = new StringBuffer();       for (int j=1;j<=m_numAttribs;j++) {	 attName.append(Utils.			doubleToString(m_eigenvectors[j][m_sortedEigens[i]],				       5,3)			+m_trainInstances.attribute(j-1).name());	 if (j != m_numAttribs) {	   if (m_eigenvectors[j+1][m_sortedEigens[i]] >= 0) {	     attName.append("+");	   }	 }       }       attributes.addElement(new Attribute(attName.toString()));       cumulative+=m_eigenvalues[m_sortedEigens[i]];       if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) {	 break;       }     }          if (m_hasClass) {       attributes.addElement(m_trainCopy.classAttribute().copy());     }     Instances outputFormat =        new Instances(m_trainInstances.relationName()+"_principal components",		     attributes, 0);     // set the class to be the last attribute if necessary     if (m_hasClass) {       outputFormat.setClassIndex(outputFormat.numAttributes()-1);     }          m_outputNumAtts = outputFormat.numAttributes();     return outputFormat;  }  // jacobi routine adapted from numerical recipies  // note arrays are from 1..n inclusive  void jacobi(double [][] a, int n, double [] d, double [][] v) {    int j,iq,ip,i;    double tresh,theta,tau,t,sm,s,h,g,c;    double [] b;    double [] z;    b = new double [n+1];    z = new double [n+1];    for (ip=1;ip<=n;ip++) {      for (iq=1;iq<=n;iq++) v[ip][iq]=0.0;      v[ip][ip]=1.0;    }    for (ip=1;ip<=n;ip++) {      b[ip]=d[ip]=a[ip][ip];      z[ip]=0.0;    }    //    *nrot=0;    for (i=1;i<=50;i++) {      sm=0.0;      for (ip=1;ip<=n-1;ip++) {	for (iq=ip+1;iq<=n;iq++)	  sm += Math.abs(a[ip][iq]);      }      if (sm == 0.0) {	//	free_vector(z,1,n);	//	free_vector(b,1,n);	return;      }      if (i < 4)	tresh=0.2*sm/(n*n);      else	tresh=0.0;      for (ip=1;ip<=n-1;ip++) {	for (iq=ip+1;iq<=n;iq++) {	  g=100.0*Math.abs(a[ip][iq]);	  if (i > 4 && (double)(Math.abs(d[ip])+g) == (double)Math.abs(d[ip])	      && (double)(Math.abs(d[iq])+g) == (double)Math.abs(d[iq]))	    a[ip][iq]=0.0;	  else if (Math.abs(a[ip][iq]) > tresh) {	    h=d[iq]-d[ip];	    if ((double)(Math.abs(h)+g) == (double)Math.abs(h))	      t=(a[ip][iq])/h;	    else {	      theta=0.5*h/(a[ip][iq]);	      t=1.0/(Math.abs(theta)+Math.sqrt(1.0+theta*theta));	      if (theta < 0.0) t = -t;	    }	    c=1.0/Math.sqrt(1+t*t);	    s=t*c;	    tau=s/(1.0+c);	    h=t*a[ip][iq];	    z[ip] -= h;	    z[iq] += h;	    d[ip] -= h;	    d[iq] += h;	    a[ip][iq]=0.0;	    for (j=1;j<=ip-1;j++) {	      //	      rotate(a,j,ip,j,iq)	      g=a[j][ip];	      h=a[j][iq];	      a[j][ip]=g-s*(h+g*tau);	      a[j][iq]=h+s*(g-h*tau);	    }	    for (j=ip+1;j<=iq-1;j++) {	      //	      rotate(a,ip,j,j,iq)	      g=a[ip][j];	      h=a[j][iq];	      a[ip][j]=g-s*(h+g*tau);	      a[j][iq]=h+s*(g-h*tau);	    }	    for (j=iq+1;j<=n;j++) {	      //	      rotate(a,ip,j,iq,j)	      g=a[ip][j];	      h=a[iq][j];	      a[ip][j]=g-s*(h+g*tau);	      a[iq][j]=h+s*(g-h*tau);	    }	    for (j=1;j<=n;j++) {	      //	      rotate(v,j,ip,j,iq)	      g=v[j][ip];	      h=v[j][iq];	      v[j][ip]=g-s*(h+g*tau);	      v[j][iq]=h+s*(g-h*tau);	    }	    //	    ++(*nrot);	  }	}      }      for (ip=1;ip<=n;ip++) {	b[ip] += z[ip];	d[ip]=b[ip];	z[ip]=0.0;      }    }    System.err.println("Too many iterations in routine jacobi");  }  /**   * Main method for testing this class   * @param argv should contain the command line arguments to the   * evaluator/transformer (see AttributeSelection)   */  public static void main(String [] argv) {    try {      System.out.println(AttributeSelection.			 SelectAttributes(new PrincipalComponents(), argv));    }    catch (Exception e) {      e.printStackTrace();      System.out.println(e.getMessage());    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -