首页 › 资源下载 › 其他 › Weka › 源码查看
paceregression.java

来自「Weka」· Java 代码 · 共 779 行 · 第 1/2 页
JAVA
779 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  *//* *    PaceRegression.java *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand */package weka.classifiers.functions;import weka.classifiers.Classifier;import weka.classifiers.functions.pace.ChisqMixture;import weka.classifiers.functions.pace.MixtureDistribution;import weka.classifiers.functions.pace.NormalMixture;import weka.classifiers.functions.pace.PaceMatrix;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.NoSupportForMissingValuesException;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.WekaException;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.core.matrix.DoubleVector;import weka.core.matrix.IntVector;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Class for building pace regression linear models and using them for prediction. <br/> * <br/> * Under regularity conditions, pace regression is provably optimal when the number of coefficients tends to infinity. It consists of a group of estimators that are either overall optimal or optimal under certain conditions.<br/> * <br/> * The current work of the pace regression theory, and therefore also this implementation, do not handle: <br/> * <br/> * - missing values <br/> * - non-binary nominal attributes <br/> * - the case that n - k is small where n is the number of instances and k is the number of coefficients (the threshold used in this implmentation is 20)<br/> * <br/> * For more information see:<br/> * <br/> * Wang, Y (2000). A new approach to fitting linear models in high dimensional spaces. Hamilton, New Zealand.<br/> * <br/> * Wang, Y., Witten, I. H.: Modeling for optimal probability prediction. In: Proceedings of the Nineteenth International Conference in Machine Learning, Sydney, Australia, 650-657, 2002. * <p/> <!-- globalinfo-end --> *   <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;phdthesis{Wang2000, *    address = {Hamilton, New Zealand}, *    author = {Wang, Y}, *    school = {Department of Computer Science, University of Waikato}, *    title = {A new approach to fitting linear models in high dimensional spaces}, *    year = {2000} * } *  * &#64;inproceedings{Wang2002, *    address = {Sydney, Australia}, *    author = {Wang, Y. and Witten, I. H.}, *    booktitle = {Proceedings of the Nineteenth International Conference in Machine Learning}, *    pages = {650-657}, *    title = {Modeling for optimal probability prediction}, *    year = {2002} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  Produce debugging output. *  (default no debugging output)</pre> *  * <pre> -E &lt;estimator&gt; *  The estimator can be one of the following: *   eb -- Empirical Bayes estimator for noraml mixture (default) *   nested -- Optimal nested model selector for normal mixture *   subset -- Optimal subset selector for normal mixture *   pace2 -- PACE2 for Chi-square mixture *   pace4 -- PACE4 for Chi-square mixture *   pace6 -- PACE6 for Chi-square mixture *  *   ols -- Ordinary least squares estimator *   aic -- AIC estimator *   bic -- BIC estimator *   ric -- RIC estimator *   olsc -- Ordinary least squares subset selector with a threshold</pre> *  * <pre> -S &lt;threshold value&gt; *  Threshold value for the OLSC estimator</pre> *  <!-- options-end --> * * @author Yong Wang (yongwang@cs.waikato.ac.nz) * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @version $Revision: 1.8 $ */public class PaceRegression   extends Classifier   implements OptionHandler, WeightedInstancesHandler, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 7230266976059115435L;    /** The model used */  Instances m_Model = null;  /** Array for storing coefficients of linear regression. */  private double[] m_Coefficients;  /** The index of the class attribute */  private int m_ClassIndex;  /** True if debug output will be printed */  private boolean m_Debug;  /** estimator type: Ordinary least squares */  private static final int olsEstimator = 0;  /** estimator type: Empirical Bayes */  private static final int ebEstimator = 1;  /** estimator type: Nested model selector */  private static final int nestedEstimator = 2;  /** estimator type: Subset selector */  private static final int subsetEstimator = 3;   /** estimator type:PACE2  */  private static final int pace2Estimator = 4;   /** estimator type: PACE4 */  private static final int pace4Estimator = 5;   /** estimator type: PACE6 */  private static final int pace6Estimator = 6;   /** estimator type: Ordinary least squares selection */  private static final int olscEstimator = 7;  /** estimator type: AIC */  private static final int aicEstimator = 8;  /** estimator type: BIC */  private static final int bicEstimator = 9;  /** estimator type: RIC */  private static final int ricEstimator = 10;  /** estimator types */  public static final Tag [] TAGS_ESTIMATOR = {    new Tag(olsEstimator, "Ordinary least squares"),    new Tag(ebEstimator, "Empirical Bayes"),    new Tag(nestedEstimator, "Nested model selector"),    new Tag(subsetEstimator, "Subset selector"),    new Tag(pace2Estimator, "PACE2"),    new Tag(pace4Estimator, "PACE4"),    new Tag(pace6Estimator, "PACE6"),    new Tag(olscEstimator, "Ordinary least squares selection"),    new Tag(aicEstimator, "AIC"),    new Tag(bicEstimator, "BIC"),    new Tag(ricEstimator, "RIC")  };  /** the estimator */  private int paceEstimator = ebEstimator;      private double olscThreshold = 2;  // AIC    /**   * Returns a string describing this classifier   * @return a description of the classifier suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Class for building pace regression linear models and using them for "      +"prediction. \n\n"      +"Under regularity conditions, pace regression is provably optimal when "      +"the number of coefficients tends to infinity. It consists of a group of "      +"estimators that are either overall optimal or optimal under certain "      +"conditions.\n\n"      +"The current work of the pace regression theory, and therefore also this "      +"implementation, do not handle: \n\n"      +"- missing values \n"      +"- non-binary nominal attributes \n"      +"- the case that n - k is small where n is the number of instances and k is "        +"the number of coefficients (the threshold used in this implmentation is 20)\n\n"      +"For more information see:\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;    TechnicalInformation 	additional;        result = new TechnicalInformation(Type.PHDTHESIS);    result.setValue(Field.AUTHOR, "Wang, Y");    result.setValue(Field.YEAR, "2000");    result.setValue(Field.TITLE, "A new approach to fitting linear models in high dimensional spaces");    result.setValue(Field.SCHOOL, "Department of Computer Science, University of Waikato");    result.setValue(Field.ADDRESS, "Hamilton, New Zealand");    additional = result.add(Type.INPROCEEDINGS);    additional.setValue(Field.AUTHOR, "Wang, Y. and Witten, I. H.");    additional.setValue(Field.YEAR, "2002");    additional.setValue(Field.TITLE, "Modeling for optimal probability prediction");    additional.setValue(Field.BOOKTITLE, "Proceedings of the Nineteenth International Conference in Machine Learning");    additional.setValue(Field.YEAR, "2002");    additional.setValue(Field.PAGES, "650-657");    additional.setValue(Field.ADDRESS, "Sydney, Australia");        return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.BINARY_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    // class    result.enable(Capability.NUMERIC_CLASS);    result.enable(Capability.DATE_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        return result;  }  /**   * Builds a pace regression model for the given data.   *   * @param data the training data to be used for generating the   * linear regression function   * @throws Exception if the classifier could not be built successfully   */  public void buildClassifier(Instances data) throws Exception {    // can classifier handle the data?    Capabilities cap = getCapabilities();    cap.setMinimumNumberInstances(20 + data.numAttributes());    cap.testWithFail(data);    // remove instances with missing class    data = new Instances(data);    data.deleteWithMissingClass();        /*     * initialize the following     */    m_Model = new Instances(data, 0);    m_ClassIndex = data.classIndex();    double[][] transformedDataMatrix =     getTransformedDataMatrix(data, m_ClassIndex);    double[] classValueVector = data.attributeToDoubleArray(m_ClassIndex);        m_Coefficients = null;    /*      * Perform pace regression     */    m_Coefficients = pace(transformedDataMatrix, classValueVector);  }  /**   * pace regression   *   * @param matrix_X matrix with observations   * @param vector_Y vektor with class values   * @return vector with coefficients   */  private double [] pace(double[][] matrix_X, double [] vector_Y) {        PaceMatrix X = new PaceMatrix( matrix_X );    PaceMatrix Y = new PaceMatrix( vector_Y, vector_Y.length );    IntVector pvt = IntVector.seq(0, X.getColumnDimension()-1);    int n = X.getRowDimension();    int kr = X.getColumnDimension();    X.lsqrSelection( Y, pvt, 1 );    X.positiveDiagonal( Y, pvt );        PaceMatrix sol = (PaceMatrix) Y.clone();    X.rsolve( sol, pvt, pvt.size() );    DoubleVector r = Y.getColumn( pvt.size(), n-1, 0);    double sde = Math.sqrt(r.sum2() / r.size());        DoubleVector aHat = Y.getColumn( 0, pvt.size()-1, 0).times( 1./sde );    DoubleVector aTilde = null;    switch( paceEstimator) {    case ebEstimator:     case nestedEstimator:    case subsetEstimator:      NormalMixture d = new NormalMixture();      d.fit( aHat, MixtureDistribution.NNMMethod );       if( paceEstimator == ebEstimator ) 	aTilde = d.empiricalBayesEstimate( aHat );      else if( paceEstimator == ebEstimator ) 	aTilde = d.subsetEstimate( aHat );      else aTilde = d.nestedEstimate( aHat );      break;    case pace2Estimator:     case pace4Estimator:    case pace6Estimator:      DoubleVector AHat = aHat.square();      ChisqMixture dc = new ChisqMixture();      dc.fit( AHat, MixtureDistribution.NNMMethod );       DoubleVector ATilde;       if( paceEstimator == pace6Estimator ) 	ATilde = dc.pace6( AHat );      else if( paceEstimator == pace2Estimator ) 	ATilde = dc.pace2( AHat );      else ATilde = dc.pace4( AHat );      aTilde = ATilde.sqrt().times( aHat.sign() );      break;    case olsEstimator:       aTilde = aHat.copy();      break;    case aicEstimator:     case bicEstimator:    case ricEstimator:     case olscEstimator:      if(paceEstimator == aicEstimator) olscThreshold = 2;      else if(paceEstimator == bicEstimator) olscThreshold = Math.log( n );      else if(paceEstimator == ricEstimator) olscThreshold = 2*Math.log( kr );      aTilde = aHat.copy();      for( int i = 0; i < aTilde.size(); i++ )	if( Math.abs(aTilde.get(i)) < Math.sqrt(olscThreshold) ) 	  aTilde.set(i, 0);    }    PaceMatrix YTilde = new PaceMatrix((new PaceMatrix(aTilde)).times( sde ));    X.rsolve( YTilde, pvt, pvt.size() );    DoubleVector betaTilde = YTilde.getColumn(0).unpivoting( pvt, kr );        return betaTilde.getArrayCopy();  }  /**   * Checks if an instance has a missing value.   * @param instance the instance   * @param model the data    * @return true if missing value is present   */  public boolean checkForMissing(Instance instance, Instances model) {    for (int j = 0; j < instance.numAttributes(); j++) {      if (j != model.classIndex()) {	if (instance.isMissing(j)) {	  return true;	}      }    }    return false;  }  /**   * Transforms dataset into a two-dimensional array.   *   * @param data dataset   * @param classIndex index of the class attribute   * @return the transformed data   */
paceregression.java - 源码说明

本页面展示了「Weka」中的 paceregression.java 源码文件，采用 Java 编程语言编写，共 779 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Weka相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?