📄 svmlightlearner.java
字号:
/** SVM<i><sup>light</sup></i> can estimate its generalization performance straight after training * using xi-alpha-estimates on the training set, without using a testing set. * Hence this method always returns <code>true</code>. */ public boolean canEstimatePerformance() { return true; // RK/2003/03/26: always true or only if xi-alpha-estimation turned on? } /** returns the estimated performance in an object of the class <tt>PerformanceVector</tt> * if this SVM<i><sup>light</sup></i> learner produced xi-alpha-estimators; * otherwise it returns null. */ public PerformanceVector getEstimatedPerformance() { return performanceEstimation; } /** Creates the files with examples for the external SVM<i><sup>light</sup></i> process. */ private File writeExamples(ExampleSet exampleSet, ExampleSet transductionExampleSet, boolean classificationTask) { File trainingFile = TempFileService.createTempFile(getName() + "_training_data_"); PrintWriter out = null; try { out = new PrintWriter(new FileWriter(trainingFile)); } catch (IOException e) { LogService.logException("Cannot write input file for SVMLight!", e); System.exit(1); } ExampleReader r = exampleSet.getExampleReader(); int numberOfAttributes = exampleSet.getNumberOfAttributes(); while (r.hasNext()) { Example e = r.next(); if (classificationTask) { if (e.getLabel() == positiveLabelIndex) { out.print("+1 "); } else if (e.getLabel() == unlabelledIndex) { out.print("0 "); } else { out.print("-1 "); } } else { out.print(e.getLabel() + " "); } out.println(e.getAttributesAsSparseString()); //// previous line changed by Timm to: //out.println(toSparseString(e)); // avoids Strings from binary attributes } if (transductionExampleSet != null) { r = transductionExampleSet.getExampleReader(); while (r.hasNext()) { Example e = r.next(); out.println("0 "+e.getAttributesAsSparseString()); } } out.close(); return trainingFile; } /** scans the output for xi-alpha-values and creates an object of the class * <code>PerformanceVector</code> with the estimated performance, if a * performance evaluation is available. */ private PerformanceVector scanXiAlpha(String svmOutput, int numberOfExamples) { try { PerformanceVector pv = new PerformanceVector(); LineNumberReader lnr = new LineNumberReader(new StringReader(svmOutput)); String line = null; double error = -1, recall = -1, precision = -1; while ((line = lnr.readLine()) != null) { line = line.trim(); if (line.startsWith("XiAlpha-estimate of the error:")) { pv.addCriterion(new EstimatedPerformance("xialpha_error", extractValue(line), numberOfExamples, true)); } else if (line.startsWith("XiAlpha-estimate of the recall:")) { pv.addCriterion(new EstimatedPerformance("xialpha_recall", extractValue(line), numberOfExamples, false)); } else if (line.startsWith("XiAlpha-estimate of the precision:")) { pv.addCriterion(new EstimatedPerformance("xialpha_precision", extractValue(line), numberOfExamples, false)); } else if (line.startsWith("Leave-one-out estimate of the error:")) { pv.addCriterion(new EstimatedPerformance("leave_one_out_error", extractValue(line), numberOfExamples, true)); } else if (line.startsWith("Leave-one-out estimate of the recall:")) { pv.addCriterion(new EstimatedPerformance("leave_one_out_recall", extractValue(line), numberOfExamples, false)); } else if (line.startsWith("Leave-one-out estimate of the precision:")) { pv.addCriterion(new EstimatedPerformance("leave_one_out_precision", extractValue(line), numberOfExamples, false)); } } return pv; } catch (IOException ioe) { LogService.logMessage("IOException: SVM^light output could not be parsed!", LogService.ERROR); } catch (NumberFormatException nfe) { LogService.logMessage("NumberFormatException: SVM^light output could not be parsed!", LogService.ERROR); } catch (IndexOutOfBoundsException ioobe) { LogService.logMessage("IndexOutOfBoundsException: SVM^light output could not be parsed!", LogService.ERROR); } return null; } // end private PerformanceVector scanXiAlpha(svmOutput) /** scans the alpha values from the file <tt>alphaFile</tt> for all examples * in the last batch, computes the desired performance estimation criterion, * and creates an object of the class <code>PerformanceVector</code> with the * estimated performance value(s). */ private PerformanceVector scanXiAlpha(ExampleSet exampleSet, File alphaFile) throws OperatorException { // RK/2002/09/13 int estimationCriterion = ESTIMATION_CRITERION_ALPHA_SUM; // RK/2002/09/13 LogService.logMessage("Alpha-value-based performance estimation on last batch by SVM^light " + "(SVMLightLearner '" + getName() + "'):\n", LogService.MINIMUM); int noOfExamples = 0; double noOfAlphasGreaterOne = 0.0; double error = 0.0; double alphaSum = 0.0; double alphaGreaterOneSum = 0.0; double currentAlpha = 0.0; try { BufferedReader alphaFileReader = new BufferedReader(new FileReader(alphaFile)); String line = null; StringTokenizer tokenizer = null; int currentlyLastBatch = 0; // index of last batch of the current time window int currentBatch = 0; // batch index of the current example Example currentExample = null; ExampleReader exampleIterator = exampleSet.getExampleReader(); currentlyLastBatch = ((BatchedExampleSet) exampleSet).getLastBatch(); while (((line = alphaFileReader.readLine()) != null) && ((currentExample = exampleIterator.next()) != null)) { line = line.trim(); currentBatch = (int) currentExample.getValue(((BatchedExampleSet) exampleSet).getBatchIndexAttribute()); if (currentBatch == currentlyLastBatch){ tokenizer = new StringTokenizer(line); if (tokenizer.countTokens() < 1) { throw new FatalException("SVMLightLearner '"+getName()+"': " + "each line in the SVM^light alpha file must " + "contain at least one value."); } currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken())); alphaSum += currentAlpha; if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; } if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; } noOfExamples++; } } } catch (IOException e) { throw new UserError(this, e, 302, alphaFile); } error = noOfAlphasGreaterOne; LogService.logMessage(" Estimated number of errors: "+(long)error+" of "+noOfExamples+" examples", LogService.MINIMUM); if (noOfExamples > 0) { error /= noOfExamples; noOfAlphasGreaterOne /= noOfExamples; alphaSum /= noOfExamples; alphaGreaterOneSum /= noOfExamples; } else { error = 1.0; LogService.logMessage("SVMLightLearner '"+this.getName()+"': setting error estimation to 1.0, " + "because number of examples for estimation equals zero", LogService.WARNING); } LogService.logMessage(" Estimated error: "+error+" of "+noOfExamples, LogService.MINIMUM); // ---- provide <tt>PerformanceVector</tt>-object to be passed to operator output by ---- // ---- the super class method <tt>apply()</tt> ---- PerformanceVector pv = new PerformanceVector(); // note: the first criterion in the vector is the one used for optimization if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_SUM) { pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaSum, exampleSet.getSize(), true)); } if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM) { pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaGreaterOneSum,exampleSet.getSize(), true)); } if (estimationCriterion == ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE) { pv.addCriterion (new EstimatedPerformance ("no_of_alphas_greater_one", noOfAlphasGreaterOne, exampleSet.getSize(), true)); } pv.addCriterion (new EstimatedPerformance ("xialpha_error", error, exampleSet.getSize(), true)); pv.addCriterion (new EstimatedPerformance ("xialpha_accuracy", (1.0-error), exampleSet.getSize(), false)); LogService.logMessage("SVMLearner: ESTIMATOR error = " + error + " (at batch " + ((BatchedExampleSet) exampleSet).getLastBatch() + ")\n" + " ESTIMATOR no. of alphas greater one = " + noOfAlphasGreaterOne + "\n" + " ESTIMATOR alpha sum = " + alphaSum + "\n" + " ESTIMATOR alpha greater one sum = " + alphaGreaterOneSum, LogService.TASK); return pv; } // end private PerformanceVector scanXiAlpha(exampleSet,alphaFile) // RK/2002/09/13 private double extractValue(String line) throws NumberFormatException, IndexOutOfBoundsException { // SVM^light output format: "error<=50%", "precision=>40%" etc. String pre = line.substring(line.indexOf("=")); if (pre.startsWith("=>")) { pre = pre.substring(1); } String value = pre.substring(1, pre.indexOf("%")); if (value.toLowerCase().equals("nan")) return Double.NaN; else return Double.parseDouble(value)/100.0; } // end private double extractValue// /** very similar to <code>Example.toSparseString</code>, but writes doubles */// public static String toSparseString(Example e) {// StringBuffer str = new StringBuffer();// boolean first = true;// for (int i = 0; i < e.getNumberOfAttributes(); i++) {// if (e.getValue(i) != 0.0) {// if (!first) str.append(" ");// str.append((i+1)+":"+e.getValue(i));// first = false;// }// }// return str.toString();// } // end private String toSparseString /** sets the index of the class to use as "positive" (+1), e.g. * <code>setPositiveLabelIndex(attribute.mapString("positive"))</code> */ public void setPositiveLabelIndex(int index) { this.positiveLabelIndex = index; } /** sets the index of the class to use as "unlabelled" (0) for transduction, e.g. * <code>setUnlabelledLabelIndex(attribute.mapString("missing"))</code>. * If transduction should not be used, set index to -1. */ public void setUnlabeledLabelIndex(int index) { this.unlabelledIndex = index; } public List getParameterTypes() { List types = super.getParameterTypes(); types.add(new ParameterTypeCategory("kernel_type", "Kernel of the support vector machine.", KERNEL_TYPES, 0)); types.add(new ParameterTypeString("additional_parameters", "Additional parameters for the SVMlight.")); types.add(new ParameterTypeCategory("task_type", "The type of the task, i.e. classification or regression.", TASK_TYPES, AUTO)); return types; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -