📄 svmlightlearner.java

📁 著名的开源仿真软件yale
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    /** SVM<i><sup>light</sup></i> can estimate its generalization performance straight after training     *  using xi-alpha-estimates on the training set, without using a testing set.     *  Hence this method always returns <code>true</code>.     */    public boolean  canEstimatePerformance() {        return true;    // RK/2003/03/26: always true or only if xi-alpha-estimation turned on?    }    /** returns the estimated performance in an object of the class <tt>PerformanceVector</tt>     *  if this SVM<i><sup>light</sup></i> learner produced xi-alpha-estimators;     *  otherwise it returns null.     */    public PerformanceVector  getEstimatedPerformance() {        return performanceEstimation;    }    /** Creates the files with examples for the external SVM<i><sup>light</sup></i> process.     */    private File  writeExamples(ExampleSet exampleSet, ExampleSet transductionExampleSet, boolean classificationTask) {	File trainingFile = TempFileService.createTempFile(getName() + "_training_data_");	PrintWriter out = null;	try {	    out = new PrintWriter(new FileWriter(trainingFile));	} catch (IOException e) {	    LogService.logException("Cannot write input file for SVMLight!", e);	    System.exit(1);	}	ExampleReader r = exampleSet.getExampleReader();	int numberOfAttributes = exampleSet.getNumberOfAttributes();	while (r.hasNext()) {	    Example e = r.next();	    if (classificationTask) {		if (e.getLabel() == positiveLabelIndex) {		    out.print("+1 ");		} else if (e.getLabel() == unlabelledIndex) {		    out.print("0 ");		} else {		    out.print("-1 ");		}	    } else {		out.print(e.getLabel() + " ");	    }	    out.println(e.getAttributesAsSparseString());	    //// previous line changed by Timm to:	    //out.println(toSparseString(e));  // avoids Strings from binary attributes	}	if (transductionExampleSet != null) {	    r = transductionExampleSet.getExampleReader(); 	    while (r.hasNext()) {		Example e = r.next();		out.println("0 "+e.getAttributesAsSparseString());	    }	}	out.close();	return trainingFile;    }    /** scans the output for xi-alpha-values and creates an object of the class     *  <code>PerformanceVector</code> with the estimated performance, if a     *  performance evaluation is available.     */    private PerformanceVector scanXiAlpha(String svmOutput, int numberOfExamples) {        try {	    PerformanceVector pv = new PerformanceVector();            LineNumberReader lnr = new LineNumberReader(new StringReader(svmOutput));            String line = null;            double error = -1, recall = -1, precision = -1;            while ((line = lnr.readLine()) != null) {                line = line.trim();                if (line.startsWith("XiAlpha-estimate of the error:")) {		    pv.addCriterion(new EstimatedPerformance("xialpha_error", extractValue(line), numberOfExamples, true));		} else if (line.startsWith("XiAlpha-estimate of the recall:")) {		    pv.addCriterion(new EstimatedPerformance("xialpha_recall", extractValue(line), numberOfExamples, false));		} else if (line.startsWith("XiAlpha-estimate of the precision:")) {		    pv.addCriterion(new EstimatedPerformance("xialpha_precision", extractValue(line), numberOfExamples, false));		} else if (line.startsWith("Leave-one-out estimate of the error:")) {		    pv.addCriterion(new EstimatedPerformance("leave_one_out_error", extractValue(line), numberOfExamples, true));		} else if (line.startsWith("Leave-one-out estimate of the recall:")) {		    pv.addCriterion(new EstimatedPerformance("leave_one_out_recall", extractValue(line), numberOfExamples, false));		} else if (line.startsWith("Leave-one-out estimate of the precision:")) {		    pv.addCriterion(new EstimatedPerformance("leave_one_out_precision", extractValue(line), numberOfExamples,  false));		}	    }            return pv;        } catch (IOException ioe) {	    LogService.logMessage("IOException: SVM^light output could not be parsed!", LogService.ERROR);	} catch (NumberFormatException nfe) {	    LogService.logMessage("NumberFormatException: SVM^light output could not be parsed!", LogService.ERROR);	} catch (IndexOutOfBoundsException ioobe) {	    LogService.logMessage("IndexOutOfBoundsException: SVM^light output could not be parsed!", LogService.ERROR);	}        return null;    } // end private PerformanceVector scanXiAlpha(svmOutput)    /** scans the alpha values from the file <tt>alphaFile</tt> for all examples     *  in the last batch, computes the desired performance estimation criterion,     *  and creates an object of the class <code>PerformanceVector</code> with the     *  estimated performance value(s).     */    private PerformanceVector scanXiAlpha(ExampleSet exampleSet, File alphaFile) throws OperatorException { // RK/2002/09/13	int estimationCriterion = ESTIMATION_CRITERION_ALPHA_SUM;                                            // RK/2002/09/13	LogService.logMessage("Alpha-value-based performance estimation on last batch by SVM^light "			      + "(SVMLightLearner '" + getName() + "'):\n", LogService.MINIMUM);        int      noOfExamples         = 0;	double   noOfAlphasGreaterOne = 0.0;	double   error                = 0.0;	double   alphaSum             = 0.0;	double   alphaGreaterOneSum   = 0.0;	double   currentAlpha         = 0.0;	try {	    BufferedReader   alphaFileReader = new BufferedReader(new FileReader(alphaFile));	    String           line            = null;	    StringTokenizer  tokenizer       = null;	    int              currentlyLastBatch = 0;     // index of last batch of the current time window	    int              currentBatch       = 0;     // batch index of the current example	    Example          currentExample     = null;	    ExampleReader    exampleIterator    = exampleSet.getExampleReader();	    currentlyLastBatch = ((BatchedExampleSet) exampleSet).getLastBatch();	    while (((line = alphaFileReader.readLine()) != null)		   && ((currentExample = exampleIterator.next()) != null)) {		line = line.trim();		currentBatch = (int) currentExample.getValue(((BatchedExampleSet) exampleSet).getBatchIndexAttribute());		if (currentBatch == currentlyLastBatch){		    tokenizer = new StringTokenizer(line);		    if (tokenizer.countTokens() < 1) {			throw new FatalException("SVMLightLearner '"+getName()+"':  "						 + "each line in the SVM^light alpha file must "						 + "contain at least one value.");		    }		    currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));		    alphaSum += currentAlpha;                                               		    if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }        		    if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }                     		    noOfExamples++;		}	    }	} catch (IOException e) {	    throw new UserError(this, e, 302, alphaFile);	}		error = noOfAlphasGreaterOne;	LogService.logMessage("  Estimated number of errors:  "+(long)error+" of "+noOfExamples+" examples",			      LogService.MINIMUM);	if (noOfExamples > 0) {	    error                /= noOfExamples;	    noOfAlphasGreaterOne /= noOfExamples;	    alphaSum             /= noOfExamples;	    alphaGreaterOneSum   /= noOfExamples;	} else { 	    error = 1.0;	    LogService.logMessage("SVMLightLearner '"+this.getName()+"': setting error estimation to 1.0, " +				  "because number of examples for estimation equals zero", LogService.WARNING);	}	LogService.logMessage("  Estimated error:             "+error+" of "+noOfExamples, LogService.MINIMUM);	// ---- provide <tt>PerformanceVector</tt>-object to be passed to operator output by ----	// ---- the super class method <tt>apply()</tt>                                      ----	PerformanceVector  pv = new PerformanceVector();    // note: the first criterion in the vector is the one used for optimization	if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_SUM) {                                            	    pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaSum, exampleSet.getSize(), true));	}                                                                                                       	if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM) {                                	    pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaGreaterOneSum,exampleSet.getSize(), true));	}                                                                                                       	if (estimationCriterion == ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE) {                             	    pv.addCriterion (new EstimatedPerformance ("no_of_alphas_greater_one", noOfAlphasGreaterOne, exampleSet.getSize(), true));	}                                                                                                       	pv.addCriterion (new EstimatedPerformance ("xialpha_error", error, exampleSet.getSize(), true));	pv.addCriterion (new EstimatedPerformance ("xialpha_accuracy", (1.0-error), exampleSet.getSize(), false));	LogService.logMessage("SVMLearner: ESTIMATOR error                     = " + error +			      "  (at batch " + ((BatchedExampleSet) exampleSet).getLastBatch() + ")\n" +			      "            ESTIMATOR no. of alphas greater one = " + noOfAlphasGreaterOne + "\n" +			      "            ESTIMATOR alpha sum                 = " + alphaSum + "\n" +			      "            ESTIMATOR alpha greater one sum     = " + alphaGreaterOneSum,			      LogService.TASK);        return pv;    } // end private PerformanceVector scanXiAlpha(exampleSet,alphaFile)         // RK/2002/09/13    private double extractValue(String line) throws NumberFormatException, IndexOutOfBoundsException {        // SVM^light output format: "error<=50%", "precision=>40%" etc.        String pre = line.substring(line.indexOf("="));        if (pre.startsWith("=>")) { pre = pre.substring(1);  }        String value = pre.substring(1, pre.indexOf("%"));	if (value.toLowerCase().equals("nan")) return Double.NaN;        else return Double.parseDouble(value)/100.0;    } // end private double extractValue//      /** very similar to <code>Example.toSparseString</code>, but writes doubles *///      public static String toSparseString(Example e) {//  	StringBuffer str = new StringBuffer();//  	boolean first = true;//  	for (int i = 0; i < e.getNumberOfAttributes(); i++) {//  	    if (e.getValue(i) != 0.0) {//  		if (!first) str.append(" ");//  		str.append((i+1)+":"+e.getValue(i));//  		first = false;//  	    }//  	}//  	return str.toString();//      } // end private String toSparseString    /** sets the index of the class to use as "positive" (+1), e.g.     *  <code>setPositiveLabelIndex(attribute.mapString("positive"))</code>     */    public void  setPositiveLabelIndex(int index) {	this.positiveLabelIndex = index;    }    /** sets the index of the class to use as "unlabelled" (0) for transduction, e.g.     *  <code>setUnlabelledLabelIndex(attribute.mapString("missing"))</code>.     *  If transduction should not be used, set index to -1.      */    public void  setUnlabeledLabelIndex(int index) {	this.unlabelledIndex = index;    }    public List getParameterTypes() {	List types = super.getParameterTypes();	types.add(new ParameterTypeCategory("kernel_type", "Kernel of the support vector machine.", KERNEL_TYPES, 0));	types.add(new ParameterTypeString("additional_parameters", "Additional parameters for the SVMlight."));	types.add(new ParameterTypeCategory("task_type", "The type of the task, i.e. classification or regression.", 					    TASK_TYPES, AUTO));	return types;    }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -