📄 svmlightlearner.java

📁 一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考。来自数据挖掘工具YALE工具包。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    }


    /** SVM<i><sup>light</sup></i> can estimate its generalization performance straight after training
     *  using xi-alpha-estimates on the training set, without using a testing set.
     *  Hence this method always returns <code>true</code>.
     */
    public boolean  canEstimatePerformance() {
        return true;    // RK/2003/03/26: always true or only if xi-alpha-estimation turned on?
    }

    /** returns the estimated performance in an object of the class <tt>PerformanceVector</tt>
     *  if this SVM<i><sup>light</sup></i> learner produced xi-alpha-estimators;
     *  otherwise it returns null.
     */
    public PerformanceVector  getEstimatedPerformance() {
        return performanceEstimation;
    }


    /** Creates the files with examples for the external SVM<i><sup>light</sup></i> process.
     */
    private File  writeExamples(ExampleSet exampleSet, ExampleSet transductionExampleSet, boolean classificationTask) {
	File trainingFile = TempFileService.createTempFile(getName() + "_training_data_");
	PrintWriter out = null;
	try {
	    out = new PrintWriter(new FileWriter(trainingFile));
	} catch (IOException e) {
	    LogService.logException("Cannot write input file for SVMLight!", e);
	}
	ExampleReader r = exampleSet.getExampleReader();
	int numberOfAttributes = exampleSet.getNumberOfAttributes();
	while (r.hasNext()) {
	    Example e = r.next();
	    if (classificationTask) {
		if (e.getLabel() == positiveLabelIndex) {
		    out.print("+1 ");
		} else if (e.getLabel() == unlabelledIndex) {
		    out.print("0 ");
		} else {
		    out.print("-1 ");
		}
	    } else {
		out.print(e.getLabel() + " ");
	    }
	    out.println(e.getAttributesAsSparseString());
	    //// previous line changed by Timm to:
	    //out.println(toSparseString(e));  // avoids Strings from binary attributes
	}

	if (transductionExampleSet != null) {
	    r = transductionExampleSet.getExampleReader(); 
	    while (r.hasNext()) {
		Example e = r.next();
		out.println("0 "+e.getAttributesAsSparseString());
	    }
	}
	out.close();
	return trainingFile;
    }



    /** scans the output for xi-alpha-values and creates an object of the class
     *  <code>PerformanceVector</code> with the estimated performance, if a
     *  performance evaluation is available.
     */
    private PerformanceVector scanXiAlpha(String svmOutput, int numberOfExamples) {
        try {
	    PerformanceVector pv = new PerformanceVector();

            LineNumberReader lnr = new LineNumberReader(new StringReader(svmOutput));
            String line = null;
            double error = -1, recall = -1, precision = -1;
            while ((line = lnr.readLine()) != null) {
                line = line.trim();
                if (line.startsWith("XiAlpha-estimate of the error:")) {
		    pv.addCriterion(new EstimatedPerformance("xialpha_error", extractValue(line), numberOfExamples, true));
		} else if (line.startsWith("XiAlpha-estimate of the recall:")) {
		    pv.addCriterion(new EstimatedPerformance("xialpha_recall", extractValue(line), numberOfExamples, false));
		} else if (line.startsWith("XiAlpha-estimate of the precision:")) {
		    pv.addCriterion(new EstimatedPerformance("xialpha_precision", extractValue(line), numberOfExamples, false));
		} else if (line.startsWith("Leave-one-out estimate of the error:")) {
		    pv.addCriterion(new EstimatedPerformance("leave_one_out_error", extractValue(line), numberOfExamples, true));
		} else if (line.startsWith("Leave-one-out estimate of the recall:")) {
		    pv.addCriterion(new EstimatedPerformance("leave_one_out_recall", extractValue(line), numberOfExamples, false));
		} else if (line.startsWith("Leave-one-out estimate of the precision:")) {
		    pv.addCriterion(new EstimatedPerformance("leave_one_out_precision", extractValue(line), numberOfExamples,  false));
		}
	    }
            return pv;
        } catch (IOException ioe) {
	    LogService.logMessage("IOException: SVM^light output could not be parsed!", LogService.ERROR);
	} catch (NumberFormatException nfe) {
	    LogService.logMessage("NumberFormatException: SVM^light output could not be parsed!", LogService.ERROR);
	} catch (IndexOutOfBoundsException ioobe) {
	    LogService.logMessage("IndexOutOfBoundsException: SVM^light output could not be parsed!", LogService.ERROR);
	}
        return null;
    } // end private PerformanceVector scanXiAlpha(svmOutput)



    /** scans the alpha values from the file <tt>alphaFile</tt> for all examples
     *  in the last batch, computes the desired performance estimation criterion,
     *  and creates an object of the class <code>PerformanceVector</code> with the
     *  estimated performance value(s).
     */
    private PerformanceVector scanXiAlpha(ExampleSet exampleSet, File alphaFile) throws OperatorException { // RK/2002/09/13
	int estimationCriterion = ESTIMATION_CRITERION_ALPHA_SUM;                                            // RK/2002/09/13

	LogService.logMessage("Alpha-value-based performance estimation on last batch by SVM^light "
			      + "(SVMLightLearner '" + getName() + "'):\n", LogService.MINIMUM);
        int      noOfExamples         = 0;
	double   noOfAlphasGreaterOne = 0.0;
	double   error                = 0.0;
	double   alphaSum             = 0.0;
	double   alphaGreaterOneSum   = 0.0;
	double   currentAlpha         = 0.0;

	try {
	    BufferedReader   alphaFileReader = new BufferedReader(new FileReader(alphaFile));
	    String           line            = null;
	    StringTokenizer  tokenizer       = null;

	    int              currentlyLastBatch = 0;     // index of last batch of the current time window
	    int              currentBatch       = 0;     // batch index of the current example
	    Example          currentExample     = null;
	    ExampleReader    exampleIterator    = exampleSet.getExampleReader();

	    currentlyLastBatch = ((BatchedExampleSet) exampleSet).getLastBatch();

	    while (((line = alphaFileReader.readLine()) != null)
		   && ((currentExample = exampleIterator.next()) != null)) {
		line = line.trim();
		currentBatch = (int) currentExample.getValue(((BatchedExampleSet) exampleSet).getBatchIndexAttribute());
		if (currentBatch == currentlyLastBatch){
		    tokenizer = new StringTokenizer(line);
		    if (tokenizer.countTokens() < 1) {
			throw new UserError(this, 911, new Object[] {
			    alphaFile, "Each line must contain at least one value."});
		    }
		    currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));
		    alphaSum += currentAlpha;                                               
		    if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }        
		    if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }                     
		    noOfExamples++;
		}
	    }
	} catch (IOException e) {
	    throw new UserError(this, e, 302, alphaFile);
	}	

	error = noOfAlphasGreaterOne;
	LogService.logMessage("  Estimated number of errors:  "+(long)error+" of "+noOfExamples+" examples",
			      LogService.MINIMUM);
	if (noOfExamples > 0) {
	    error                /= noOfExamples;
	    noOfAlphasGreaterOne /= noOfExamples;
	    alphaSum             /= noOfExamples;
	    alphaGreaterOneSum   /= noOfExamples;
	} else { 
	    error = 1.0;
	    LogService.logMessage("SVMLightLearner '"+this.getName()+"': setting error estimation to 1.0, " +
				  "because number of examples for estimation equals zero", LogService.WARNING);
	}
	LogService.logMessage("  Estimated error:             "+error+" of "+noOfExamples, LogService.MINIMUM);

	// ---- provide <tt>PerformanceVector</tt>-object to be passed to operator output by ----
	// ---- the super class method <tt>apply()</tt>                                      ----
	PerformanceVector  pv = new PerformanceVector();    // note: the first criterion in the vector is the one used for optimization
	if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_SUM) {                                            
	    pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaSum, exampleSet.getSize(), true));
	}                                                                                                       
	if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM) {                                
	    pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaGreaterOneSum,exampleSet.getSize(), true));
	}                                                                                                       
	if (estimationCriterion == ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE) {                             
	    pv.addCriterion (new EstimatedPerformance ("no_of_alphas_greater_one", noOfAlphasGreaterOne, exampleSet.getSize(), true));
	}                                                                                                       
	pv.addCriterion (new EstimatedPerformance ("xialpha_error", error, exampleSet.getSize(), true));
	pv.addCriterion (new EstimatedPerformance ("xialpha_accuracy", (1.0-error), exampleSet.getSize(), false));

	LogService.logMessage("SVMLearner: ESTIMATOR error                     = " + error +
			      "  (at batch " + ((BatchedExampleSet) exampleSet).getLastBatch() + ")\n" +
			      "            ESTIMATOR no. of alphas greater one = " + noOfAlphasGreaterOne + "\n" +
			      "            ESTIMATOR alpha sum                 = " + alphaSum + "\n" +
			      "            ESTIMATOR alpha greater one sum     = " + alphaGreaterOneSum,
			      LogService.TASK);

        return pv;
    } // end private PerformanceVector scanXiAlpha(exampleSet,alphaFile)         // RK/2002/09/13



    private double extractValue(String line) throws NumberFormatException, IndexOutOfBoundsException {
        // SVM^light output format: "error<=50%", "precision=>40%" etc.
        String pre = line.substring(line.indexOf("="));
        if (pre.startsWith("=>")) { pre = pre.substring(1);  }
        String value = pre.substring(1, pre.indexOf("%"));
	if (value.toLowerCase().equals("nan")) return Double.NaN;
        else return Double.parseDouble(value)/100.0;
    } // end private double extractValue



//      /** very similar to <code>Example.toSparseString</code>, but writes doubles */
//      public static String toSparseString(Example e) {
//  	StringBuffer str = new StringBuffer();
//  	boolean first = true;
//  	for (int i = 0; i < e.getNumberOfAttributes(); i++) {
//  	    if (e.getValue(i) != 0.0) {
//  		if (!first) str.append(" ");
//  		str.append((i+1)+":"+e.getValue(i));
//  		first = false;
//  	    }
//  	}
//  	return str.toString();
//      } // end private String toSparseString



    /** sets the index of the class to use as "positive" (+1), e.g.
     *  <code>setPositiveLabelIndex(attribute.mapString("positive"))</code>
     */
    public void  setPositiveLabelIndex(int index) {
	this.positiveLabelIndex = index;
    }


    /** sets the index of the class to use as "unlabelled" (0) for transduction, e.g.
     *  <code>setUnlabelledLabelIndex(attribute.mapString("missing"))</code>.
     *  If transduction should not be used, set index to -1. 
     */
    public void  setUnlabeledLabelIndex(int index) {
	this.unlabelledIndex = index;
    }



    public List getParameterTypes() {
	List types = super.getParameterTypes();
	ParameterType type = new ParameterTypeCategory("kernel_type", "Kernel of the support vector machine.", KERNEL_TYPES, 0);
	type.setExpert(false);
	types.add(type);
	types.add(new ParameterTypeString("additional_parameters", "Additional parameters for the SVMlight."));
	types.add(new ParameterTypeCategory("task_type", "The type of the task, i.e. classification or regression.", 
					    TASK_TYPES, AUTO));
	return types;
    }
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -