⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mysvmlearner.java

📁 一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考。来自数据挖掘工具YALE工具包。
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/*
 *  YALE - Yet Another Learning Environment
 *  Copyright (C) 2001-2004
 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, 
 *          Katharina Morik, Oliver Ritthoff
 *      Artificial Intelligence Unit
 *      Computer Science Department
 *      University of Dortmund
 *      44221 Dortmund,  Germany
 *  email: yale-team@lists.sourceforge.net
 *  web:   http://yale.cs.uni-dortmund.de/
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as 
 *  published by the Free Software Foundation; either version 2 of the
 *  License, or (at your option) any later version. 
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 *  USA.
 */
package edu.udo.cs.yale.operator.learner.kernel;

import edu.udo.cs.yale.Yale;
import edu.udo.cs.yale.example.Attribute;
import edu.udo.cs.yale.example.Example;
import edu.udo.cs.yale.example.ExampleSet;
import edu.udo.cs.yale.example.BatchedExampleSet;
import edu.udo.cs.yale.example.ExampleReader;
import edu.udo.cs.yale.example.SkipNANExampleReader;
import edu.udo.cs.yale.operator.Operator;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.operator.parameter.*;
import edu.udo.cs.yale.operator.learner.AbstractLearner;
import edu.udo.cs.yale.operator.learner.Learner;
import edu.udo.cs.yale.operator.learner.Model;
import edu.udo.cs.yale.operator.performance.PerformanceVector;
import edu.udo.cs.yale.operator.performance.EstimatedPerformance;
import edu.udo.cs.yale.tools.ParameterService;
import edu.udo.cs.yale.tools.LogService;
import edu.udo.cs.yale.tools.TempFileService;
import edu.udo.cs.yale.tools.Tools;
import edu.udo.cs.yale.tools.OutputStreamMultiplier;
import edu.udo.cs.yale.tools.Ontology;

import java.util.List;
import java.util.LinkedList;
import java.util.StringTokenizer;
import java.util.ArrayList;
import java.io.*;


/** <p>
 *   <code>MySVMLearner</code> encapsulates the external 
 *   <a target="_top" href="http://www-ai.cs.uni-dortmund.de/SOFTWARE/MYSVM/index.eng.html"><code>mySVM</code></a>
 *   program by 
 *   <a target="_top" href="http://www-ai.cs.uni-dortmund.de/PERSONAL/rueping.eng.html">Stefan R&uuml;ping</a>
 *   {@yale.cite Rueping/2000a}, an implementation of Vladimir Vapnik's Support Vector Machine (SVM) 
 *   learning method from statistical learning theory {@yale.cite Vapnik/98a}.
 *  </p>
 *  <p>
 *    The <code>MySVMLearner</code> operator calls an operating system process to start the 
 *    <code>mySVM</code> and passes to it via <code>stdin</code> the <code>mySVM</code> parameters 
 *    and the training example set and reads the learning result from <code>stdout</code>.
 *    The model then is the output file.
 *  </p>
 *  <p>
 *    In order to use the <code>mySVM</code> from within Yale, the binaries must be correctly installed
 *    and the properties <code>yale.mysvm.learncommand</code> and <code>yale.mysvm.applycommand</code>
 *    must be set to the fully qualified path of the <code>mySVM</code> executables.
 *    If the <code>mySVM</code> version  allowing individual example weighting is used, the properties
 *    <code>yale.mysvm.weighted.learncommand</code> and <code>yale.mysvm.weighted.applycommand</code>
 *    must be set. For properties and the yalerc configuration file, please see the tutorial chapter
 *    on installation notes.
 *  </p>
 *  <p>
 *    The operator supports all of the <code>mySVM</code> parameters. A full description of the mySVM parameters
 *    can be found on the mySVM home page.
 *  </p>
 *  <p>
 *    The parameter <code>task_type</code> determines, whether a classification or a regression function
 *    is learned. If set to <code>auto</code>, the task is determined by the type of the label (nominal or numerical).
 *  </p>
 *  <p>
 *    <code>mySVM</code> parameters are optional for this operator, if and only if they are
 *    optional for <code>mySVM</code>.
 *  </p>
 *
 *  @yale.xmlclass MySVMLearner
 *  @yale.reference Rueping/2000a
 *  @yale.reference Vapnik/98a
 *  @yale.index SVM
 *  @author  Ingo Mierswa, Ralf Klinkenberg, Simon Fischer
 *  @version $Id: MySVMLearner.java,v 1.4 2004/08/27 11:57:40 ingomierswa Exp $
 *  @see     edu.udo.cs.yale.operator.learner.kernel.MySVMModel
 *  @see     edu.udo.cs.yale.example.ExampleSet
 *  @yale.todo rewrite this class (RK: why? in which respect?)
 */
public class MySVMLearner extends AbstractLearner {
    /* History:
     *   RK/2002/07/05: xi-alpha estimation and alternatively sum of alphas (greater one) estimation
     *                  (on all batches or on last batch only (if (ExampleSet instanceof BatchedExampleSet)));
     *   RK/2002/09/13: xi-alpha estimation, sum of alphas estimation, sum of alphas greater one estimation,
     *                  no. of alphas greater one estimation;
     *   RK/2003/03/18: merger of Yale 1.0 version of this file by RK and Yale 2.0 version by IM;
     *   RK/2003/03/26: merger of Yale 1.0 and Yale 2.0 versions completed;
     *   RK/2003/07/28: no longer needed temporary comments removed;
     *
     * To do:
     *   -> evtl.: use separate executables for 'yale.mysvm.weighted.learncommand' and the additional
     *      'yale.mysvm.xialpha.learncommand';
     */

    static {
	Yale.registerYaleProperty(new ParameterTypeFile("yale.mysvm.weighted.learncommand", "Path to the weighted mySVM executable", true));
	Yale.registerYaleProperty(new ParameterTypeFile("yale.mysvm.learncommand", "Path to the mySVM executable", true));
	Yale.registerYaleProperty(new ParameterTypeFile("yale.mysvm.weighted.applycommand", "Path to the weighted mySVM applier executable", true));
	Yale.registerYaleProperty(new ParameterTypeFile("yale.mysvm.applycommand", "Path to the mySVM applier executable", true));
    }

    static final int  ESTIMATION_CRITERION_ERROR                    = 0;
    static final int  ESTIMATION_CRITERION_XI_ALPHA_ERROR           = 1;
    static final int  ESTIMATION_CRITERION_ALPHA_SUM                = 2;
    static final int  ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM    = 3;
    static final int  ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE = 4;

    static final int  SVM_LEARNER = 0;
    static final int  SVM_APPLIER = 1;

    /** Kernel parameters of the mySVM. */
    private static final String[]  KERNEL_PARAMETER = { "type", "gamma", "degree", "a", "b" };
    /** The kernel types of the mySVM. */
    private static final String[]  KERNEL_TYPES = { "dot", "polynomial", "radial", "neural", "anova" };

    /** Other parameters of the mySVM. */
    private static final ParameterType[]  PARAMETER = {
    	// global
	new ParameterTypeDouble("nu", "Use nu-SVM with the given value of nu instead of normal SVM", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeInt("verbosity", "Verbositiy level", 1, 5, true),
	
	// loss
	new ParameterTypeDouble("C", "The SVM complexity constant", 0, Double.POSITIVE_INFINITY, 0.0),
	new ParameterTypeDouble("L+", "Penalize positive deviation (prediction too high) by this factor", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("L-", "Penalize negative deviation (prediction too high) by this factor", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("epsilon", "Insensitivity constant. No loss if prediction lies this close to true value", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("epsilon+", "Epsilon for positive deviation only", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("epsilon-", "Epsilon for negative deviation only", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),

	new ParameterTypeDouble("quadraticLoss", "Use quadratic loss for both positive and negative deviation", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("quadraticLoss+", "Use quadratic loss for positive deviation", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("quadraticLoss-", "Use quadratic loss for negative deviation", 
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),

	// optimizer
	new ParameterTypeInt("working_set_size", "Optimize this many examples in each iteration", 
			     0, Integer.MAX_VALUE, true),
	new ParameterTypeInt("max_iterations", "Stop after this many iterations", 
			     0, Integer.MAX_VALUE, true),
	new ParameterTypeInt("shrink_const", "Fix a variable to the bound if it is optimal for this much iterations",
			     0, Integer.MAX_VALUE, true),
	new ParameterTypeDouble("is_zero", "Numerical precision",
				0, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("descend", "Make this much descend on the target function in each iteration",
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("convergence_epsilon", "Precision on the KKT conditions",
				Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true),
	new ParameterTypeInt("kernel_cache", "Size of the cache for kernel evaluations im MB ",
			     0, Integer.MAX_VALUE, true),

	// training algorithms
	new ParameterTypeInt("cross_validation", "Do cross validation on the training examples with the given number of chunks",
			     0, Integer.MAX_VALUE, true),
	new ParameterTypeBoolean("cv_inorder", "Do cross validation in the order the examples are given in", false),
	new ParameterTypeInt("cv_window", "Do cross validation by moving a window of the given number of chunks over the training data. (Implies cv_inorder",
			     0, Integer.MAX_VALUE, true),
	new ParameterTypeCategory("search_C", "Find an optimal C in the range of cmin to cmax by Adding or Multiplying the current C by cdelta",
				  new String[] { "off", "a", "m" }, 0),
	new ParameterTypeDouble("Cmin", "Lower bound for search_C", 0, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("Cmax", "Upper bound for search_C", 0, Double.POSITIVE_INFINITY, true),
	new ParameterTypeDouble("Cdelta", "Step size for search_C", 0, Double.POSITIVE_INFINITY, true)
    };

    

    /** xi-alpha performance estimation of mySVM (if parameter 'xi_alpha_estimation' is 'true' (default: 'false')) */
    private PerformanceVector  performanceEstimation;

    /** Index of the positive class among the values of the label attribute */
    private static int  positiveLabelIndex = Attribute.FIRST_CLASS_INDEX;
    /** Index of the unlabeled class among the values of the label attribute */
    private static int  unlabelledIndex    = -1;

    /** All svm parameters as key and value pair. */
    private String[] svmParameters;

    /** All kernel parameters as key and value pair. */
    private String[] kernelParameters;

    // static int  stdoutCounter = 0;    // RK/2002/07/02: TMP: counter to distinguish stored output of different mySVM calls;


    /** Writes the parameters and examples and invokes the external mySVM process. Returns a
     *  model or null if it can not be learned.
     */
    public Model learn(ExampleSet exampleSet) throws OperatorException {
	LogService.logMessage("MySVMLearner '"+getName()+"': "
			      +"mySVM starts learning ("+exampleSet.getSize()+" examples).", LogService.TASK);
   	LogService.logMessage("MySVMLearner '"+getName()+"': The example set is "
			      +((exampleSet instanceof BatchedExampleSet) ? "a":"no")+" 'BatchedExampleSet'.",
   			      LogService.MINIMUM);

	if (exampleSet.getSize() < 2) {
	    throw new UserError(this, 110, new Integer(2));
	}

	// ---- check if learning task is classification or regression ----
	boolean classificationTask = taskIsClassification(getParameterAsInt("task_type"), exampleSet);
	LogService.logMessage("MySVMLearner '"+getName()+"': Using "
			      +(classificationTask ? "classification" : "regression")+" mode.",
			      LogService.MINIMUM);

	// ---- create operating system process to call mySVM ----
	String   command  = null;
	boolean  weighted = getParameterAsBoolean("weighted_examples");
	if (weighted) {
	    command = ParameterService.getProperty("yale.mysvm.weighted.learncommand");
	} else { 
	    command = ParameterService.getProperty("yale.mysvm.learncommand");
	}
	boolean  sparse = getParameterAsBoolean("sparse");
	LogService.logMessage("MySVMLearner '"+getName()+"': Using "+
			      (sparse?"sparse" : "dense")+" data format.",
			      LogService.MINIMUM);

	LogService.logMessage("MySVMLearner '" + getName() + "': Executing '" + command + 
			      "' in directory '" + TempFileService.getTempDir() + "'", 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -