📄 leastmedsq.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * LeastMedSq.java * * Copyright (C) 2001 Tony Voyle */package weka.classifiers.functions;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.supervised.attribute.NominalToBinary;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import weka.filters.unsupervised.instance.RemoveRange;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Implements a least median sqaured linear regression utilising the existing weka LinearRegression class to form predictions. <br/> * Least squared regression functions are generated from random subsamples of the data. The least squared regression with the lowest meadian squared error is chosen as the final model.<br/> * <br/> * The basis of the algorithm is <br/> * <br/> * Peter J. Rousseeuw, Annick M. Leroy (1987). Robust regression and outlier detection. . * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @book{Rousseeuw1987, * author = {Peter J. Rousseeuw and Annick M. Leroy}, * title = {Robust regression and outlier detection}, * year = {1987} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <sample size> * Set sample size * (default: 4) * </pre> * * <pre> -G <seed> * Set the seed used to generate samples * (default: 0) * </pre> * * <pre> -D * Produce debugging output * (default no debugging output) * </pre> * <!-- options-end --> * * @author Tony Voyle (tv6@waikato.ac.nz) * @version $Revision: 1.13 $ */public class LeastMedSq extends Classifier implements OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 4288954049987652970L; private double[] m_Residuals; private double[] m_weight; private double m_SSR; private double m_scalefactor; private double m_bestMedian = Double.POSITIVE_INFINITY; private LinearRegression m_currentRegression; private LinearRegression m_bestRegression; private LinearRegression m_ls; private Instances m_Data; private Instances m_RLSData; private Instances m_SubSample; private ReplaceMissingValues m_MissingFilter; private NominalToBinary m_TransformFilter; private RemoveRange m_SplitFilter; private int m_samplesize = 4; private int m_samples; private boolean m_israndom = false; private boolean m_debug = false; private Random m_random; private long m_randomseed = 0; /** * Returns a string describing this classifier * @return a description of the classifier suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Implements a least median sqaured linear regression utilising the " +"existing weka LinearRegression class to form predictions. \n" +"Least squared regression functions are generated from random subsamples of " +"the data. The least squared regression with the lowest meadian squared error " +"is chosen as the final model.\n\n" +"The basis of the algorithm is \n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.BOOK); result.setValue(Field.AUTHOR, "Peter J. Rousseeuw and Annick M. Leroy"); result.setValue(Field.YEAR, "1987"); result.setValue(Field.TITLE, "Robust regression and outlier detection"); return result; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NUMERIC_CLASS); result.enable(Capability.DATE_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Build lms regression * * @param data training data * @throws Exception if an error occurs */ public void buildClassifier(Instances data)throws Exception{ // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); cleanUpData(data); getSamples(); findBestRegression(); buildRLSRegression(); } // buildClassifier /** * Classify a given instance using the best generated * LinearRegression Classifier. * * @param instance instance to be classified * @return class value * @throws Exception if an error occurs */ public double classifyInstance(Instance instance)throws Exception{ Instance transformedInstance = instance; m_TransformFilter.input(transformedInstance); transformedInstance = m_TransformFilter.output(); m_MissingFilter.input(transformedInstance); transformedInstance = m_MissingFilter.output(); return m_ls.classifyInstance(transformedInstance); } // classifyInstance /** * Cleans up data * * @param data data to be cleaned up * @throws Exception if an error occurs */ private void cleanUpData(Instances data)throws Exception{ m_Data = data; m_TransformFilter = new NominalToBinary(); m_TransformFilter.setInputFormat(m_Data); m_Data = Filter.useFilter(m_Data, m_TransformFilter); m_MissingFilter = new ReplaceMissingValues(); m_MissingFilter.setInputFormat(m_Data); m_Data = Filter.useFilter(m_Data, m_MissingFilter); m_Data.deleteWithMissingClass(); } /** * Gets the number of samples to use. * * @throws Exception if an error occurs */ private void getSamples()throws Exception{ int stuf[] = new int[] {500,50,22,17,15,14}; if ( m_samplesize < 7){ if ( m_Data.numInstances() < stuf[m_samplesize - 1]) m_samples = combinations(m_Data.numInstances(), m_samplesize); else m_samples = m_samplesize * 500; } else m_samples = 3000; if (m_debug){ System.out.println("m_samplesize: " + m_samplesize); System.out.println("m_samples: " + m_samples); System.out.println("m_randomseed: " + m_randomseed); } } /** * Set up the random number generator * */ private void setRandom(){ m_random = new Random(getRandomSeed()); } /** * Finds the best regression generated from m_samples * random samples from the training data * * @throws Exception if an error occurs */ private void findBestRegression()throws Exception{ setRandom(); m_bestMedian = Double.POSITIVE_INFINITY; if (m_debug) { System.out.println("Starting:"); } for(int s = 0, r = 0; s < m_samples; s++, r++){ if (m_debug) { if(s%(m_samples/100)==0) System.out.print("*"); } genRegression(); getMedian(); } if (m_debug) { System.out.println(""); } m_currentRegression = m_bestRegression; } /** * Generates a LinearRegression classifier from * the current m_SubSample * * @throws Exception if an error occurs */ private void genRegression()throws Exception{ m_currentRegression = new LinearRegression(); m_currentRegression.setOptions(new String[]{"-S", "1"}); selectSubSample(m_Data); m_currentRegression.buildClassifier(m_SubSample); } /** * Finds residuals (squared) for the current * regression. * * @throws Exception if an error occurs */ private void findResiduals()throws Exception{ m_SSR = 0; m_Residuals = new double [m_Data.numInstances()]; for(int i = 0; i < m_Data.numInstances(); i++){ m_Residuals[i] = m_currentRegression.classifyInstance(m_Data.instance(i)); m_Residuals[i] -= m_Data.instance(i).value(m_Data.classAttribute()); m_Residuals[i] *= m_Residuals[i]; m_SSR += m_Residuals[i]; } } /** * finds the median residual squared for the * current regression * * @throws Exception if an error occurs */ private void getMedian()throws Exception{ findResiduals(); int p = m_Residuals.length; select(m_Residuals, 0, p - 1, p / 2); if(m_Residuals[p / 2] < m_bestMedian){ m_bestMedian = m_Residuals[p / 2]; m_bestRegression = m_currentRegression; } } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -