📄 smoreg.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    SMOreg.java
 *    Copyright (C) 2002 Sylvain Roy
 *
 */

package weka.classifiers.functions;

import weka.classifiers.functions.supportVector.*;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.Standardize;
import weka.filters.Filter;
import java.util.*;
import java.io.*;
import weka.core.*;

/**
 * Implements Alex J.Smola and Bernhard Scholkopf sequential minimal optimization
 * algorithm for training a support vector regression using polynomial
 * or RBF kernels. 
 *
 * This implementation globally replaces all missing values and
 * transforms nominal attributes into binary ones. It also
 * normalizes all attributes by default. (Note that the coefficients
 * in the output are based on the normalized/standardized data, not the
 * original data.)
 *
 *
 * For more information on the SMO algorithm, see<p>
 *
 * Alex J. Smola, Bernhard Scholkopf (1998). <i>A Tutorial on Support Vector Regression</i>. 
 * NeuroCOLT2 Technical Report Series - NC2-TR-1998-030. <p>
 *
 * S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy, 
 * <i>Improvements to SMO Algorithm for SVM Regression</i>. 
 * Technical Report CD-99-16, Control Division Dept of Mechanical and Production Engineering, 
 * National University of Singapore.<p>
 *
 *
 * @author Sylvain Roy (sro33@student.canterbury.ac.nz)
 * @version $Revision: 1.1 $
 */
public class SMOreg extends Classifier implements OptionHandler, 
  WeightedInstancesHandler {
    

  /** Feature-space normalization? */
  protected boolean m_featureSpaceNormalization = false;
  
  /** Use RBF kernel? (default: poly) */
  protected boolean m_useRBF = false;
    
  /** The size of the cache (a prime number) */
  protected int m_cacheSize = 250007;
    
  /** Kernel to use **/
  protected Kernel m_kernel;

  /** Use lower-order terms? */
  protected boolean m_lowerOrder = false;

  /** The exponent for the polynomial kernel. */
  protected double m_exponent = 1.0;
    
  /** Gamma for the RBF kernel. */
  protected double m_gamma = 0.01;

  /** The class index from the training data */
  protected int m_classIndex = -1;

  /** Only numeric attributes in the dataset? */
  protected boolean m_onlyNumeric;

  /** The filter used to make attributes numeric. */
  protected NominalToBinary m_NominalToBinary;
    
  /** The filter to apply to the training data */
  public static final int FILTER_NORMALIZE = 0;
  public static final int FILTER_STANDARDIZE = 1;
  public static final int FILTER_NONE = 2;
  public static final Tag [] TAGS_FILTER = {
    new Tag(FILTER_NORMALIZE, "Normalize training data"),
    new Tag(FILTER_STANDARDIZE, "Standardize training data"),
    new Tag(FILTER_NONE, "No normalization/standardization"),
  };
    
  /** The filter used to standardize/normalize all values. */
  protected Filter m_Filter = null;
    
  /** Whether to normalize/standardize/neither */
  protected int m_filterType = FILTER_NORMALIZE;

  /** The filter used to get rid of missing values. */
  protected ReplaceMissingValues m_Missing;
    
  /** Turn off all checks and conversions? Turning them off assumes
      that data is purely numeric, doesn't contain any missing values,
      and has a numeric class. Turning them off also means that
      no header information will be stored if the machine is linear. 
      Finally, it also assumes that no instance has a weight equal to 0.*/
  protected boolean m_checksTurnedOff = false;
    
  /** The training data. */
  protected Instances m_data;
    
  /** The complexity parameter */
  protected double m_C = 1.0;

  /** The Lagrange multipliers */
  protected double[] m_alpha;
  protected double[] m_alpha_;

  /** The thresholds. */
  protected double m_b, m_bLow, m_bUp;

  /** The indices for m_bLow and m_bUp */
  protected int m_iLow, m_iUp;

  /** Weight vector for linear machine. */
  protected double[] m_weights;

  /** The current set of errors for all non-bound examples. */
  protected double[] m_fcache;

  /** The five different sets used by the algorithm. */
  protected SMOset m_I0; // {i: 0 < m_alpha[i] < C || 0 < m_alpha_[i] < C}
  protected SMOset m_I1; // {i: m_class[i] = 0, m_alpha_[i] = 0}
  protected SMOset m_I2; // {i: m_class[i] = 0, m_alpha_[i] = C}
  protected SMOset m_I3; // {i: m_class[i] = C, m_alpha_[i] = 0}

  /** The parameter epsilon */
  protected double m_epsilon = 1e-3;

  /** The parameter tol */
  protected double m_tol = 1.0e-3;

  /** The parameter eps */
  protected double m_eps = 1.0e-12;

  /** Precision constant for updating sets */
  protected static double m_Del = 1e-10;
    
  /** The parameters of the linear transforamtion realized 
   * by the filter on the class attribute */
  protected double m_Alin;
  protected double m_Blin;

  /** Variables to hold weight vector in sparse form.
      (To reduce storage requirements.) */
  protected double[] m_sparseWeights;
  protected int[] m_sparseIndices;

  /**
   * Returns a string describing classifier
   * @return a description suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {

    return  "Implements Alex Smola and Bernhard Scholkopf's sequential minimal "
      + "optimization algorithm for training a support vector regression model. "
      + "This implementation globally replaces all missing values and "
      + "transforms nominal attributes into binary ones. It also "
      + "normalizes all attributes by default. (Note that the coefficients "
      + "in the output are based on the normalized/standardized data, not the "
      + "original data.) "
      + "For more information on the SMO algorithm, see\n\n"
      + "Alex J. Smola, Bernhard Scholkopf (1998). \"A Tutorial on Support Vector "
      + "Regression\".  NeuroCOLT2 Technical Report Series - NC2-TR-1998-030.\n\n"
      + "S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy,  "
      + "\"Improvements to SMO Algorithm for SVM Regression\".  "
      + "Technical Report CD-99-16, Control Division Dept of Mechanical and "
      + "Production Engineering, National University of Singapore. ";
  }

  /**
   * Method for building the classifier. 
   *
   * @param insts the set of training instances
   * @exception Exception if the classifier can't be built successfully
   */
  public void buildClassifier(Instances insts) throws Exception {

    /* check the set of training instances */

    if (!m_checksTurnedOff) {
      if (insts.checkForStringAttributes()) {
	throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
      }
      if (insts.classAttribute().isNominal()) {
	throw new UnsupportedClassTypeException("SMOreg can't handle a nominal class! "
						+ "Use SMO for performing "
						+ "classification.");
      }
      insts = new Instances(insts);
      insts.deleteWithMissingClass();
      if (insts.numInstances() == 0) {
	throw new Exception("No training instances without a missing class!");
      }
	
      /* Removes all the instances with weight equal to 0.
	 MUST be done since condition (6) of Shevade's paper 
	 is made with the assertion Ci > 0 (See equation (1a). */
      Instances data = new Instances(insts, insts.numInstances());
      for(int i = 0; i < insts.numInstances(); i++){
	if(insts.instance(i).weight() > 0)
	  data.add(insts.instance(i));
      }
      if (data.numInstances() == 0) {
	throw new Exception("No training instances left after removing " + 
			    "instance with either a weight null or a missing class!");
      }
      insts = data;
    }

    m_onlyNumeric = true;
    if (!m_checksTurnedOff) {
      for (int i = 0; i < insts.numAttributes(); i++) {
	if (i != insts.classIndex()) {
	  if (!insts.attribute(i).isNumeric()) {
	    m_onlyNumeric = false;
	    break;
	  }
	}
      }
    }

    if (!m_checksTurnedOff) {
      m_Missing = new ReplaceMissingValues();
      m_Missing.setInputFormat(insts);
      insts = Filter.useFilter(insts, m_Missing); 
    } else {
      m_Missing = null;
    }

    if (!m_onlyNumeric) {
      m_NominalToBinary = new NominalToBinary();
      m_NominalToBinary.setInputFormat(insts);
      insts = Filter.useFilter(insts, m_NominalToBinary);
    } else {
      m_NominalToBinary = null;
    }

    m_classIndex = insts.classIndex();
    if (m_filterType == FILTER_STANDARDIZE) {
      m_Filter = new Standardize();
      ((Standardize)m_Filter).setIgnoreClass(true);
      m_Filter.setInputFormat(insts);
      insts = Filter.useFilter(insts, m_Filter); 
    } else if (m_filterType == FILTER_NORMALIZE) {
      m_Filter = new Normalize();
      ((Normalize)m_Filter).setIgnoreClass(true);
      m_Filter.setInputFormat(insts);
      insts = Filter.useFilter(insts, m_Filter); 
    } else {
      m_Filter = null;
    }

    m_data = insts;

    // determine which linear transformation has been 
    // applied to the class by the filter
    if (m_Filter != null) {
      Instance witness = (Instance)insts.instance(0).copy();
      witness.setValue(m_classIndex, 0);
      m_Filter.input(witness);
      m_Filter.batchFinished();
      Instance res = m_Filter.output();
      m_Blin = res.value(m_classIndex);
      witness.setValue(m_classIndex, 1);
      m_Filter.input(witness);
      m_Filter.batchFinished();
      res = m_Filter.output();
      m_Alin = res.value(m_classIndex) - m_Blin;
    } else {
      m_Alin = 1.0;
      m_Blin = 0.0;
    }

    // Initialize kernel
    if(m_useRBF) {
      m_kernel = new RBFKernel(m_data, m_cacheSize, m_gamma);
    } else {
      if (m_featureSpaceNormalization) {
	m_kernel = new NormalizedPolyKernel(m_data, m_cacheSize, m_exponent, m_lowerOrder);
      } else {
	m_kernel = new PolyKernel(m_data, m_cacheSize, m_exponent, m_lowerOrder);
      }
    }
	
    // If machine is linear, reserve space for weights
    if (!m_useRBF && m_exponent == 1.0) {
      m_weights = new double[m_data.numAttributes()];
    } else {
      m_weights = null;
    }

    // Initialize fcache
    m_fcache = new double[m_data.numInstances()];

    // Initialize sets
    m_I0 = new SMOset(m_data.numInstances());
    m_I1 = new SMOset(m_data.numInstances());
    m_I2 = new SMOset(m_data.numInstances());
    m_I3 = new SMOset(m_data.numInstances());


    /* MAIN ROUTINE FOR MODIFICATION 1 */
    // Follows the specification of the first modification of Shevade's paper 
		
    // Initialize alpha array to zero
    m_alpha = new double[m_data.numInstances()];
    m_alpha_ = new double[m_data.numInstances()];
	
    // set I1 to contain all the examples
    for(int i = 0; i < m_data.numInstances(); i++){
      m_I1.insert(i);
    }
	
    // choose any example i from the training set : i = 0 
    m_bUp = m_data.instance(0).classValue() + m_epsilon;
    m_bLow = m_data.instance(0).classValue() - m_epsilon;
    m_iUp = m_iLow = 0;
	
    int numChanged = 0;
    boolean examineAll = true;
    while(numChanged > 0 || examineAll){
      numChanged = 0;
      if(examineAll){
	// loop over all the example
	for(int I = 0; I < m_alpha.length; I++){
	  numChanged += examineExample(I);
	}
      } else {
	// loop over I_0
	for (int I = m_I0.getNext(-1); I != -1; I = m_I0.getNext(I)) {
	  numChanged += examineExample(I);
	  if(m_bUp > m_bLow - 2 * m_tol){
	    numChanged = 0;
	    break;
	  }
	}
      }
      if (examineAll)
	examineAll = false;
      else if (numChanged == 0)
	examineAll = true;
    }
		
    /* END OF MAIN ROUTINE FOR MODIFICATION 1 */

    // debuggage
    // checkOptimality();
    // checkAlphas();
    // displayB();

    // Set threshold
    m_b = (m_bLow + m_bUp) / 2.0;
	
    // Save memory
    m_kernel.clean(); m_fcache = null; 
    m_I0 = m_I1 = m_I2 = m_I3 = null;
	
    // If machine is linear, delete training data
    // and store weight vector in sparse format
    if (!m_useRBF && m_exponent == 1.0) {
	    
      // compute weight vector
      for(int j = 0; j < m_weights.length; j++){
	m_weights[j] = 0;
      }
      for(int k = 0; k < m_alpha.length; k++){
	for(int j = 0; j < m_weights.length; j++){
	  if(j == m_data.classIndex())
	    continue;
	  m_weights[j] += (m_alpha[k] - m_alpha_[k]) * m_data.instance(k).value(j);
	}
      }

      // Convert weight vector
      double[] sparseWeights = new double[m_weights.length];
      int[] sparseIndices = new int[m_weights.length];
      int counter = 0;
      for (int ii = 0; ii < m_weights.length; ii++) {
	if (m_weights[ii] != 0.0) {
	  sparseWeights[counter] = m_weights[ii];
	  sparseIndices[counter] = ii;
	  counter++;
	}
      }
      m_sparseWeights = new double[counter];
      m_sparseIndices = new int[counter];
      System.arraycopy(sparseWeights, 0, m_sparseWeights, 0, counter);
      System.arraycopy(sparseIndices, 0, m_sparseIndices, 0, counter);

      // Clean out training data
      if (!m_checksTurnedOff) {
      	m_data = new Instances(m_data, 0);
      } else {
      	m_data = null;
      }
	    
      // Clean out weight vector
      m_weights = null;
	    
      // We don't need the alphas in the linear case
      m_alpha = null;
      m_alpha_ = null;
    }
  }

  /**
   * Examines instance. (As defined in Shevade's paper.)
   *
   * @param i2 index of instance to examine
   * @return true if examination was successfull
   * @exception Exception if something goes wrong
   */    
  protected int examineExample(int i2) throws Exception{

    // Lagrange multipliers for i2
    double alpha2 = m_alpha[i2];
    double alpha2_ = m_alpha_[i2];
	
    double F2 = 0;
    if(m_I0.contains(i2)){
      F2 = m_fcache[i2];
    } else {
      // compute F2 = F_i2 and set f-cache[i2] = F2
      F2 = m_data.instance(i2).classValue();
      for(int j = 0; j < m_alpha.length; j++){
	F2 -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i2, j, m_data.instance(i2));
      }
      m_fcache[i2] = F2;

      // Update (b_low, i_low) or (b_up, i_up) using (F2, i2)...
      if(m_I1.contains(i2)){
	if(F2 + m_epsilon < m_bUp){
	  m_bUp = F2 + m_epsilon;
	  m_iUp = i2;
	} else if (F2 - m_epsilon > m_bLow){
	  m_bLow = F2 - m_epsilon;
	  m_iLow = i2;
	}
      } else if(m_I2.contains(i2) && (F2+m_epsilon > m_bLow)){
	m_bLow = F2 + m_epsilon;
	m_iLow = i2;
      } else if(m_I3.contains(i2) && (F2-m_epsilon < m_bUp)){
	m_bUp = F2 - m_epsilon;
	m_iUp = i2;
      }
    }

    // check optimality using current b_low and b_up and, if 
    // violated, find an index i1 to do joint optimization with i2...
    boolean optimality = true;
    int i1 = -1;
	
    // case 1 : i2 is in I_0a
    if(m_I0.contains(i2) && 0 < alpha2 && alpha2 < m_C * m_data.instance(i2).weight()){
      if(m_bLow-(F2-m_epsilon) > 2 * m_tol){
	optimality = false;
	i1 = m_iLow;
	// for i2 in I_0a choose the better i1
	if((F2 - m_epsilon) - m_bUp > m_bLow - (F2 - m_epsilon)){
	  i1 = m_iUp;
	}
      }else if((F2 - m_epsilon)-m_bUp > 2 * m_tol){
	optimality = false;
	i1 = m_iUp;
	// for i2 in I_0a choose the better i1...
	if(m_bLow-(F2-m_epsilon) > (F2-m_epsilon)-m_bUp){
	  i1 = m_iLow;
	}
12 3 4 下一页
💿 文件大小 7173 K
👤 上传用户 boobyb
📂 所属分类 Java编程
🏷️ 相关标签

#MacroWeka #weka #扩展 #数据挖掘
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -