📄 smoreg.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* SMOreg.java
* Copyright (C) 2002 Sylvain Roy
*
*/
package weka.classifiers.functions;
import weka.classifiers.functions.supportVector.*;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.filters.unsupervised.attribute.NominalToBinary;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
import weka.filters.unsupervised.attribute.Normalize;
import weka.filters.unsupervised.attribute.Standardize;
import weka.filters.Filter;
import java.util.*;
import java.io.*;
import weka.core.*;
/**
* Implements Alex J.Smola and Bernhard Scholkopf sequential minimal optimization
* algorithm for training a support vector regression using polynomial
* or RBF kernels.
*
* This implementation globally replaces all missing values and
* transforms nominal attributes into binary ones. It also
* normalizes all attributes by default. (Note that the coefficients
* in the output are based on the normalized/standardized data, not the
* original data.)
*
*
* For more information on the SMO algorithm, see<p>
*
* Alex J. Smola, Bernhard Scholkopf (1998). <i>A Tutorial on Support Vector Regression</i>.
* NeuroCOLT2 Technical Report Series - NC2-TR-1998-030. <p>
*
* S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy,
* <i>Improvements to SMO Algorithm for SVM Regression</i>.
* Technical Report CD-99-16, Control Division Dept of Mechanical and Production Engineering,
* National University of Singapore.<p>
*
*
* @author Sylvain Roy (sro33@student.canterbury.ac.nz)
* @version $Revision: 1.1 $
*/
public class SMOreg extends Classifier implements OptionHandler,
WeightedInstancesHandler {
/** Feature-space normalization? */
protected boolean m_featureSpaceNormalization = false;
/** Use RBF kernel? (default: poly) */
protected boolean m_useRBF = false;
/** The size of the cache (a prime number) */
protected int m_cacheSize = 250007;
/** Kernel to use **/
protected Kernel m_kernel;
/** Use lower-order terms? */
protected boolean m_lowerOrder = false;
/** The exponent for the polynomial kernel. */
protected double m_exponent = 1.0;
/** Gamma for the RBF kernel. */
protected double m_gamma = 0.01;
/** The class index from the training data */
protected int m_classIndex = -1;
/** Only numeric attributes in the dataset? */
protected boolean m_onlyNumeric;
/** The filter used to make attributes numeric. */
protected NominalToBinary m_NominalToBinary;
/** The filter to apply to the training data */
public static final int FILTER_NORMALIZE = 0;
public static final int FILTER_STANDARDIZE = 1;
public static final int FILTER_NONE = 2;
public static final Tag [] TAGS_FILTER = {
new Tag(FILTER_NORMALIZE, "Normalize training data"),
new Tag(FILTER_STANDARDIZE, "Standardize training data"),
new Tag(FILTER_NONE, "No normalization/standardization"),
};
/** The filter used to standardize/normalize all values. */
protected Filter m_Filter = null;
/** Whether to normalize/standardize/neither */
protected int m_filterType = FILTER_NORMALIZE;
/** The filter used to get rid of missing values. */
protected ReplaceMissingValues m_Missing;
/** Turn off all checks and conversions? Turning them off assumes
that data is purely numeric, doesn't contain any missing values,
and has a numeric class. Turning them off also means that
no header information will be stored if the machine is linear.
Finally, it also assumes that no instance has a weight equal to 0.*/
protected boolean m_checksTurnedOff = false;
/** The training data. */
protected Instances m_data;
/** The complexity parameter */
protected double m_C = 1.0;
/** The Lagrange multipliers */
protected double[] m_alpha;
protected double[] m_alpha_;
/** The thresholds. */
protected double m_b, m_bLow, m_bUp;
/** The indices for m_bLow and m_bUp */
protected int m_iLow, m_iUp;
/** Weight vector for linear machine. */
protected double[] m_weights;
/** The current set of errors for all non-bound examples. */
protected double[] m_fcache;
/** The five different sets used by the algorithm. */
protected SMOset m_I0; // {i: 0 < m_alpha[i] < C || 0 < m_alpha_[i] < C}
protected SMOset m_I1; // {i: m_class[i] = 0, m_alpha_[i] = 0}
protected SMOset m_I2; // {i: m_class[i] = 0, m_alpha_[i] = C}
protected SMOset m_I3; // {i: m_class[i] = C, m_alpha_[i] = 0}
/** The parameter epsilon */
protected double m_epsilon = 1e-3;
/** The parameter tol */
protected double m_tol = 1.0e-3;
/** The parameter eps */
protected double m_eps = 1.0e-12;
/** Precision constant for updating sets */
protected static double m_Del = 1e-10;
/** The parameters of the linear transforamtion realized
* by the filter on the class attribute */
protected double m_Alin;
protected double m_Blin;
/** Variables to hold weight vector in sparse form.
(To reduce storage requirements.) */
protected double[] m_sparseWeights;
protected int[] m_sparseIndices;
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Implements Alex Smola and Bernhard Scholkopf's sequential minimal "
+ "optimization algorithm for training a support vector regression model. "
+ "This implementation globally replaces all missing values and "
+ "transforms nominal attributes into binary ones. It also "
+ "normalizes all attributes by default. (Note that the coefficients "
+ "in the output are based on the normalized/standardized data, not the "
+ "original data.) "
+ "For more information on the SMO algorithm, see\n\n"
+ "Alex J. Smola, Bernhard Scholkopf (1998). \"A Tutorial on Support Vector "
+ "Regression\". NeuroCOLT2 Technical Report Series - NC2-TR-1998-030.\n\n"
+ "S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy, "
+ "\"Improvements to SMO Algorithm for SVM Regression\". "
+ "Technical Report CD-99-16, Control Division Dept of Mechanical and "
+ "Production Engineering, National University of Singapore. ";
}
/**
* Method for building the classifier.
*
* @param insts the set of training instances
* @exception Exception if the classifier can't be built successfully
*/
public void buildClassifier(Instances insts) throws Exception {
/* check the set of training instances */
if (!m_checksTurnedOff) {
if (insts.checkForStringAttributes()) {
throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
}
if (insts.classAttribute().isNominal()) {
throw new UnsupportedClassTypeException("SMOreg can't handle a nominal class! "
+ "Use SMO for performing "
+ "classification.");
}
insts = new Instances(insts);
insts.deleteWithMissingClass();
if (insts.numInstances() == 0) {
throw new Exception("No training instances without a missing class!");
}
/* Removes all the instances with weight equal to 0.
MUST be done since condition (6) of Shevade's paper
is made with the assertion Ci > 0 (See equation (1a). */
Instances data = new Instances(insts, insts.numInstances());
for(int i = 0; i < insts.numInstances(); i++){
if(insts.instance(i).weight() > 0)
data.add(insts.instance(i));
}
if (data.numInstances() == 0) {
throw new Exception("No training instances left after removing " +
"instance with either a weight null or a missing class!");
}
insts = data;
}
m_onlyNumeric = true;
if (!m_checksTurnedOff) {
for (int i = 0; i < insts.numAttributes(); i++) {
if (i != insts.classIndex()) {
if (!insts.attribute(i).isNumeric()) {
m_onlyNumeric = false;
break;
}
}
}
}
if (!m_checksTurnedOff) {
m_Missing = new ReplaceMissingValues();
m_Missing.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Missing);
} else {
m_Missing = null;
}
if (!m_onlyNumeric) {
m_NominalToBinary = new NominalToBinary();
m_NominalToBinary.setInputFormat(insts);
insts = Filter.useFilter(insts, m_NominalToBinary);
} else {
m_NominalToBinary = null;
}
m_classIndex = insts.classIndex();
if (m_filterType == FILTER_STANDARDIZE) {
m_Filter = new Standardize();
((Standardize)m_Filter).setIgnoreClass(true);
m_Filter.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Filter);
} else if (m_filterType == FILTER_NORMALIZE) {
m_Filter = new Normalize();
((Normalize)m_Filter).setIgnoreClass(true);
m_Filter.setInputFormat(insts);
insts = Filter.useFilter(insts, m_Filter);
} else {
m_Filter = null;
}
m_data = insts;
// determine which linear transformation has been
// applied to the class by the filter
if (m_Filter != null) {
Instance witness = (Instance)insts.instance(0).copy();
witness.setValue(m_classIndex, 0);
m_Filter.input(witness);
m_Filter.batchFinished();
Instance res = m_Filter.output();
m_Blin = res.value(m_classIndex);
witness.setValue(m_classIndex, 1);
m_Filter.input(witness);
m_Filter.batchFinished();
res = m_Filter.output();
m_Alin = res.value(m_classIndex) - m_Blin;
} else {
m_Alin = 1.0;
m_Blin = 0.0;
}
// Initialize kernel
if(m_useRBF) {
m_kernel = new RBFKernel(m_data, m_cacheSize, m_gamma);
} else {
if (m_featureSpaceNormalization) {
m_kernel = new NormalizedPolyKernel(m_data, m_cacheSize, m_exponent, m_lowerOrder);
} else {
m_kernel = new PolyKernel(m_data, m_cacheSize, m_exponent, m_lowerOrder);
}
}
// If machine is linear, reserve space for weights
if (!m_useRBF && m_exponent == 1.0) {
m_weights = new double[m_data.numAttributes()];
} else {
m_weights = null;
}
// Initialize fcache
m_fcache = new double[m_data.numInstances()];
// Initialize sets
m_I0 = new SMOset(m_data.numInstances());
m_I1 = new SMOset(m_data.numInstances());
m_I2 = new SMOset(m_data.numInstances());
m_I3 = new SMOset(m_data.numInstances());
/* MAIN ROUTINE FOR MODIFICATION 1 */
// Follows the specification of the first modification of Shevade's paper
// Initialize alpha array to zero
m_alpha = new double[m_data.numInstances()];
m_alpha_ = new double[m_data.numInstances()];
// set I1 to contain all the examples
for(int i = 0; i < m_data.numInstances(); i++){
m_I1.insert(i);
}
// choose any example i from the training set : i = 0
m_bUp = m_data.instance(0).classValue() + m_epsilon;
m_bLow = m_data.instance(0).classValue() - m_epsilon;
m_iUp = m_iLow = 0;
int numChanged = 0;
boolean examineAll = true;
while(numChanged > 0 || examineAll){
numChanged = 0;
if(examineAll){
// loop over all the example
for(int I = 0; I < m_alpha.length; I++){
numChanged += examineExample(I);
}
} else {
// loop over I_0
for (int I = m_I0.getNext(-1); I != -1; I = m_I0.getNext(I)) {
numChanged += examineExample(I);
if(m_bUp > m_bLow - 2 * m_tol){
numChanged = 0;
break;
}
}
}
if (examineAll)
examineAll = false;
else if (numChanged == 0)
examineAll = true;
}
/* END OF MAIN ROUTINE FOR MODIFICATION 1 */
// debuggage
// checkOptimality();
// checkAlphas();
// displayB();
// Set threshold
m_b = (m_bLow + m_bUp) / 2.0;
// Save memory
m_kernel.clean(); m_fcache = null;
m_I0 = m_I1 = m_I2 = m_I3 = null;
// If machine is linear, delete training data
// and store weight vector in sparse format
if (!m_useRBF && m_exponent == 1.0) {
// compute weight vector
for(int j = 0; j < m_weights.length; j++){
m_weights[j] = 0;
}
for(int k = 0; k < m_alpha.length; k++){
for(int j = 0; j < m_weights.length; j++){
if(j == m_data.classIndex())
continue;
m_weights[j] += (m_alpha[k] - m_alpha_[k]) * m_data.instance(k).value(j);
}
}
// Convert weight vector
double[] sparseWeights = new double[m_weights.length];
int[] sparseIndices = new int[m_weights.length];
int counter = 0;
for (int ii = 0; ii < m_weights.length; ii++) {
if (m_weights[ii] != 0.0) {
sparseWeights[counter] = m_weights[ii];
sparseIndices[counter] = ii;
counter++;
}
}
m_sparseWeights = new double[counter];
m_sparseIndices = new int[counter];
System.arraycopy(sparseWeights, 0, m_sparseWeights, 0, counter);
System.arraycopy(sparseIndices, 0, m_sparseIndices, 0, counter);
// Clean out training data
if (!m_checksTurnedOff) {
m_data = new Instances(m_data, 0);
} else {
m_data = null;
}
// Clean out weight vector
m_weights = null;
// We don't need the alphas in the linear case
m_alpha = null;
m_alpha_ = null;
}
}
/**
* Examines instance. (As defined in Shevade's paper.)
*
* @param i2 index of instance to examine
* @return true if examination was successfull
* @exception Exception if something goes wrong
*/
protected int examineExample(int i2) throws Exception{
// Lagrange multipliers for i2
double alpha2 = m_alpha[i2];
double alpha2_ = m_alpha_[i2];
double F2 = 0;
if(m_I0.contains(i2)){
F2 = m_fcache[i2];
} else {
// compute F2 = F_i2 and set f-cache[i2] = F2
F2 = m_data.instance(i2).classValue();
for(int j = 0; j < m_alpha.length; j++){
F2 -= (m_alpha[j] - m_alpha_[j]) * m_kernel.eval(i2, j, m_data.instance(i2));
}
m_fcache[i2] = F2;
// Update (b_low, i_low) or (b_up, i_up) using (F2, i2)...
if(m_I1.contains(i2)){
if(F2 + m_epsilon < m_bUp){
m_bUp = F2 + m_epsilon;
m_iUp = i2;
} else if (F2 - m_epsilon > m_bLow){
m_bLow = F2 - m_epsilon;
m_iLow = i2;
}
} else if(m_I2.contains(i2) && (F2+m_epsilon > m_bLow)){
m_bLow = F2 + m_epsilon;
m_iLow = i2;
} else if(m_I3.contains(i2) && (F2-m_epsilon < m_bUp)){
m_bUp = F2 - m_epsilon;
m_iUp = i2;
}
}
// check optimality using current b_low and b_up and, if
// violated, find an index i1 to do joint optimization with i2...
boolean optimality = true;
int i1 = -1;
// case 1 : i2 is in I_0a
if(m_I0.contains(i2) && 0 < alpha2 && alpha2 < m_C * m_data.instance(i2).weight()){
if(m_bLow-(F2-m_epsilon) > 2 * m_tol){
optimality = false;
i1 = m_iLow;
// for i2 in I_0a choose the better i1
if((F2 - m_epsilon) - m_bUp > m_bLow - (F2 - m_epsilon)){
i1 = m_iUp;
}
}else if((F2 - m_epsilon)-m_bUp > 2 * m_tol){
optimality = false;
i1 = m_iUp;
// for i2 in I_0a choose the better i1...
if(m_bLow-(F2-m_epsilon) > (F2-m_epsilon)-m_bUp){
i1 = m_iLow;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -