⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svmlightlearner.java

📁 一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考。来自数据挖掘工具YALE工具包。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *  YALE - Yet Another Learning Environment
 *  Copyright (C) 2001-2004
 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, 
 *          Katharina Morik, Oliver Ritthoff
 *      Artificial Intelligence Unit
 *      Computer Science Department
 *      University of Dortmund
 *      44221 Dortmund,  Germany
 *  email: yale-team@lists.sourceforge.net
 *  web:   http://yale.cs.uni-dortmund.de/
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as 
 *  published by the Free Software Foundation; either version 2 of the
 *  License, or (at your option) any later version. 
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 *  USA.
 */
package edu.udo.cs.yale.operator.learner.kernel;

import edu.udo.cs.yale.Yale;
import edu.udo.cs.yale.example.Attribute;
import edu.udo.cs.yale.example.ExampleReader;
import edu.udo.cs.yale.example.ExampleSet;
import edu.udo.cs.yale.example.Example;
import edu.udo.cs.yale.example.BatchedExampleSet;  // RK/2002/09/13
import edu.udo.cs.yale.operator.Operator;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.operator.MissingIOObjectException;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.parameter.*;
import edu.udo.cs.yale.operator.learner.AbstractLearner;
import edu.udo.cs.yale.operator.learner.Learner;
import edu.udo.cs.yale.operator.learner.Model;
import edu.udo.cs.yale.operator.performance.PerformanceVector;
import edu.udo.cs.yale.operator.performance.EstimatedPerformance;
import edu.udo.cs.yale.tools.LogService;
import edu.udo.cs.yale.tools.Ontology;
import edu.udo.cs.yale.tools.Tools;
import edu.udo.cs.yale.tools.TempFileService;
import edu.udo.cs.yale.tools.ParameterService;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import java.io.*;

/** <code>SVMLightLearner</code> encapsulates the external 
 *  <a TARGET="_top" href="http://svmlight.joachims.org/">SVM<sup>light</sup></a> 
 *  program by 
 *  <a TARGET="_top" href="http://www.joachims.org/">Thorsten Joachims</a> {@yale.cite Joachims/99a},
 *  an implementation of Vladimir Vapnik's Support Vector Machine (SVM) 
 *  learning method from statistical learning theory for the 
 *  problem of pattern recognition (classification) {@yale.cite Vapnik/98a}.
 *     The algorithm has scalable memory requirements and can handle problems
 *  with many thousands of support vectors efficiently.<br/>
 *  If there are two {@link ExampleSet}s in the input, the learner performs transduction.
 *  The first is assumed to be the labeled training data and the second may contain examples without
 *  labels, e.g. the test set.
 *  <p>
 *    This learner writes the training set to a file and calls the native 
 *    version of the SVM<sup>light</sup>. In order to do that, the binaries
 *    must be correctly installed and the properties <code>yale.svmlight.learncommand</code> and
 *    <code>yale.svmlight.applycommand</code> must be correctly set in any of the yalerc 
 *    configuraion files.
 *  </p>
 *
 *  @yale.xmlclass SVMLightLearner
 *  @yale.reference Joachims/99a
 *  @yale.reference Vapnik/98a
 *  @yale.index SVM
 *  @author Ingo Mierswa, Simon Fischer, Ralf Klinkenberg
 *  @version $Id: SVMLightLearner.java,v 1.3 2004/08/27 11:57:40 ingomierswa Exp $
 */
public class SVMLightLearner extends AbstractLearner {
    /* To Do:
     *   Internal note: For transduction do the following:
     *   <ul>
     *     <li>Create a new attribute that will be used as label: label = new Attribute(...)</li>
     *     <li>Add the label to the ExampleTable</li>
     *     <li>Clone the original exampleSet and use it as temp for generating the label values
     *     <li>Append a new AttributeReference to the temp
     *     <li>Iterate over all examples and set the value of the new label attribute according
     *         to your needs (e.g. example.setValue(label, "missing"))</li>
     *     <li>Clone the original example set once again</li>
     *     <li>Call setLabel(label) on this new example set</li>
     *     <li>Call setUnlabelledLabelIndex(label.mapString("missing")) on the SVMLightLearner</li>
     *   </ul>
     *   IDEA: Maybe subclass FeatureGenerator to generate the new label?
     *
     * History:
     *   RK/2002/07/05: performance estimation on all batches only and only with xi-alpha estimators
     *                  using the estimator values provided in the SVM^light standard output;
     *   RK/2002/09/13: added alternative performance evaluation by reading alpha values from a
     *                  separate SVM^light output file and computing the sum of alphas estimation, 
     *                  sum of alphas greater one estimation, no. of alphas greater one estimation
     *                  (on all batches or on last batch only (if (ExampleSet instanceof BatchedExampleSet)));
     *   RK/2003/03/26: merger of RK's Yale 1.0 version of this file into Yale 2.0;
     */

    static {
	Yale.registerYaleProperty(new ParameterTypeFile("yale.svmlight.learncommand", "Path to the svmlight executable", true));
	Yale.registerYaleProperty(new ParameterTypeFile("yale.svmlight.applycommand", "Path to the svmlight applier executable", true));
    }

    private static final String[]  KERNEL_TYPES = {"linear","polynomial","rbf","sigmoid"};

    static final int  ESTIMATION_CRITERION_XI_ALPHA_ERROR           = 0;    // RK/2002/09/13
    static final int  ESTIMATION_CRITERION_ALPHA_SUM                = 1;    // RK/2002/09/13
    static final int  ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM    = 2;    // RK/2002/09/13
    static final int  ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE = 3;    // RK/2002/09/13

    private int  positiveLabelIndex = Attribute.FIRST_CLASS_INDEX;
    private int  unlabelledIndex    = -1;

    private PerformanceVector  performanceEstimation;

    /** creates configuration files and start the learning process on the given examples.
     *  Returns a model or null, if it can not be created. 
     */
    public Model learn(ExampleSet exampleSet) throws OperatorException {
	LogService.logMessage("SVMLightLearner '"+getName()+"':  SVMlight starts learning.",
			      LogService.TASK);

	ExampleSet transductionExampleSet = null;
	try {
	    transductionExampleSet = (ExampleSet)getInput(ExampleSet.class);
	    if (transductionExampleSet != null) {
		LogService.logMessage("SVMLightLearner '"+getName()+"':  Found "+
				      exampleSet.getSize()+" examples for transduction.", 
				      LogService.TASK);
	    }
	} catch (MissingIOObjectException e) {}
	
	Attribute  labelAtt = exampleSet.getLabel();
	if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(labelAtt.getValueType(), Ontology.NOMINAL)) {
	    throw new UserError(this, 101, new Object[] {"SVMlight", labelAtt.getName() });
	}
	// this is already checked by taskIsClassification()
//  	if (labelAtt.getNumberOfClasses() != 2) {
//  	    LogService.logMessage("SVMLightLearner '"+getName()+"':  Training set label "
//  				  + "should have exactly two classes, but has " +
//  				  labelAtt.getNumberOfClasses(), LogService.ERROR);
//  	}

	// error estimation on all batches (default) or on last batch only (concept drift experiments)?
	boolean  estimateOnAllBatches = !(exampleSet instanceof BatchedExampleSet);   // RK/2002/09/13

	// Aus- und Eingabedatei vorbereiten
	boolean classificationTask = taskIsClassification(getParameterAsInt("task_type"), exampleSet);
	File trainingFile = writeExamples(exampleSet, transductionExampleSet, classificationTask);

	// Bestimme Parameter
	int kernelType = getParameterAsInt("kernel_type");
	String parameters = getParameterAsString("additional_parameters");
	if (parameters == null) parameters = "";
	String  command = ParameterService.getProperty("yale.svmlight.learncommand");

	File  modelFile = TempFileService.createTempFile(getName()+"_model_");
	File  alphaFile = TempFileService.createTempFile(getName()+"_alphas_");   // RK/2002/09/13
	LogService.logMessage(getName()+": Using "
			      +(classificationTask ? "classification" : "regression")+" mode.",
			      LogService.MINIMUM);

	// Prozess starten
	Process  process = null;
	String   completeCommand;
	String options = " -t " + kernelType + " -z "+(classificationTask ? "c" : "r");
	if (estimateOnAllBatches) {
	    completeCommand = command + options + " " + parameters + " " +
		trainingFile + " " + modelFile;
	} else {
	    completeCommand = command + options + " -a " + alphaFile + " " + parameters + " " +
		trainingFile + " " + modelFile;
	}

	LogService.logMessage("SVMLightLearner '"+getName()+"':  SVMlight called with command '" 
			      + completeCommand + "'", LogService.TASK);
	try {
	    process = Runtime.getRuntime().exec(completeCommand);
	} catch (IOException e) {
	    TempFileService.deleteTempFile(modelFile);
	    TempFileService.deleteTempFile(alphaFile);
	    TempFileService.deleteTempFile(trainingFile);
	    throw new UserError(this, e, 310, new Object[] {command, e});
 	}

	// ---- get output ----
        BufferedReader in = new BufferedReader(new InputStreamReader(process.getInputStream())); // output of the external SVM^light process

	String output = null;
	try {
	    output = Tools.readOutput(in);;
	} catch (IOException e) {
	    TempFileService.deleteTempFile(modelFile);
	    TempFileService.deleteTempFile(alphaFile);
	    TempFileService.deleteTempFile(trainingFile);
	    throw new UserError(this, e, 308, "SVMlight");
	}

	try {
	    Tools.waitForProcess(this, process, "svmlight");
	} catch (OperatorException e) {
	    TempFileService.deleteTempFile(modelFile);
	    TempFileService.deleteTempFile(alphaFile);
	    TempFileService.deleteTempFile(trainingFile);
	    LogService.logMessage("Output of svm_learn:\n"+output, LogService.ERROR);
	    throw e;
	}

	try {
	    in.close();
	} catch (IOException e) {
	    TempFileService.deleteTempFile(modelFile);
	    TempFileService.deleteTempFile(alphaFile);
	    TempFileService.deleteTempFile(trainingFile);
	    LogService.logException("SVMLightLearner '"+getName()+"':  "
				    +"Cannot close SVMlight input stream.", e);
	    return null;
	}

	LogService.logMessage(output, LogService.MINIMUM);
	LogService.logMessage("SVMlight has succesfully trained a model.", LogService.TASK);

	// scan the output for xi-alpha-values and create an EstimatedPerformance-object,
	// if they are there (estimation on all batches), or compute performance criterion
	// from file with SVM^light alpha values (estimation on last batch only):
	if (estimateOnAllBatches) {                                       // RK/2002/09/13
	    performanceEstimation = scanXiAlpha(output, exampleSet.getSize());  // RK/2002/09/13: old
	} else {                                                          // RK/2002/09/13
	    performanceEstimation = scanXiAlpha(exampleSet,alphaFile);    // RK/2002/09/13: new
	}                                                                 // RK/2002/09/13

	// return the learned model (encapsulating the corresponding model file)
	try {
	    return new SVMLightModel(exampleSet.getLabel(), modelFile, classificationTask);
	} catch (IOException e) {
	    throw new UserError(this, e, 302, new Object[] { modelFile, e.getMessage() });
	} finally {
	    TempFileService.deleteTempFile(modelFile);
	    TempFileService.deleteTempFile(alphaFile);
	    TempFileService.deleteTempFile(trainingFile);
	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -