📄 svmlightlearner.java
字号:
}
/** SVM<i><sup>light</sup></i> can estimate its generalization performance straight after training
* using xi-alpha-estimates on the training set, without using a testing set.
* Hence this method always returns <code>true</code>.
*/
public boolean canEstimatePerformance() {
return true; // RK/2003/03/26: always true or only if xi-alpha-estimation turned on?
}
/** returns the estimated performance in an object of the class <tt>PerformanceVector</tt>
* if this SVM<i><sup>light</sup></i> learner produced xi-alpha-estimators;
* otherwise it returns null.
*/
public PerformanceVector getEstimatedPerformance() {
return performanceEstimation;
}
/** Creates the files with examples for the external SVM<i><sup>light</sup></i> process.
*/
private File writeExamples(ExampleSet exampleSet, ExampleSet transductionExampleSet, boolean classificationTask) {
File trainingFile = TempFileService.createTempFile(getName() + "_training_data_");
PrintWriter out = null;
try {
out = new PrintWriter(new FileWriter(trainingFile));
} catch (IOException e) {
LogService.logException("Cannot write input file for SVMLight!", e);
}
ExampleReader r = exampleSet.getExampleReader();
int numberOfAttributes = exampleSet.getNumberOfAttributes();
while (r.hasNext()) {
Example e = r.next();
if (classificationTask) {
if (e.getLabel() == positiveLabelIndex) {
out.print("+1 ");
} else if (e.getLabel() == unlabelledIndex) {
out.print("0 ");
} else {
out.print("-1 ");
}
} else {
out.print(e.getLabel() + " ");
}
out.println(e.getAttributesAsSparseString());
//// previous line changed by Timm to:
//out.println(toSparseString(e)); // avoids Strings from binary attributes
}
if (transductionExampleSet != null) {
r = transductionExampleSet.getExampleReader();
while (r.hasNext()) {
Example e = r.next();
out.println("0 "+e.getAttributesAsSparseString());
}
}
out.close();
return trainingFile;
}
/** scans the output for xi-alpha-values and creates an object of the class
* <code>PerformanceVector</code> with the estimated performance, if a
* performance evaluation is available.
*/
private PerformanceVector scanXiAlpha(String svmOutput, int numberOfExamples) {
try {
PerformanceVector pv = new PerformanceVector();
LineNumberReader lnr = new LineNumberReader(new StringReader(svmOutput));
String line = null;
double error = -1, recall = -1, precision = -1;
while ((line = lnr.readLine()) != null) {
line = line.trim();
if (line.startsWith("XiAlpha-estimate of the error:")) {
pv.addCriterion(new EstimatedPerformance("xialpha_error", extractValue(line), numberOfExamples, true));
} else if (line.startsWith("XiAlpha-estimate of the recall:")) {
pv.addCriterion(new EstimatedPerformance("xialpha_recall", extractValue(line), numberOfExamples, false));
} else if (line.startsWith("XiAlpha-estimate of the precision:")) {
pv.addCriterion(new EstimatedPerformance("xialpha_precision", extractValue(line), numberOfExamples, false));
} else if (line.startsWith("Leave-one-out estimate of the error:")) {
pv.addCriterion(new EstimatedPerformance("leave_one_out_error", extractValue(line), numberOfExamples, true));
} else if (line.startsWith("Leave-one-out estimate of the recall:")) {
pv.addCriterion(new EstimatedPerformance("leave_one_out_recall", extractValue(line), numberOfExamples, false));
} else if (line.startsWith("Leave-one-out estimate of the precision:")) {
pv.addCriterion(new EstimatedPerformance("leave_one_out_precision", extractValue(line), numberOfExamples, false));
}
}
return pv;
} catch (IOException ioe) {
LogService.logMessage("IOException: SVM^light output could not be parsed!", LogService.ERROR);
} catch (NumberFormatException nfe) {
LogService.logMessage("NumberFormatException: SVM^light output could not be parsed!", LogService.ERROR);
} catch (IndexOutOfBoundsException ioobe) {
LogService.logMessage("IndexOutOfBoundsException: SVM^light output could not be parsed!", LogService.ERROR);
}
return null;
} // end private PerformanceVector scanXiAlpha(svmOutput)
/** scans the alpha values from the file <tt>alphaFile</tt> for all examples
* in the last batch, computes the desired performance estimation criterion,
* and creates an object of the class <code>PerformanceVector</code> with the
* estimated performance value(s).
*/
private PerformanceVector scanXiAlpha(ExampleSet exampleSet, File alphaFile) throws OperatorException { // RK/2002/09/13
int estimationCriterion = ESTIMATION_CRITERION_ALPHA_SUM; // RK/2002/09/13
LogService.logMessage("Alpha-value-based performance estimation on last batch by SVM^light "
+ "(SVMLightLearner '" + getName() + "'):\n", LogService.MINIMUM);
int noOfExamples = 0;
double noOfAlphasGreaterOne = 0.0;
double error = 0.0;
double alphaSum = 0.0;
double alphaGreaterOneSum = 0.0;
double currentAlpha = 0.0;
try {
BufferedReader alphaFileReader = new BufferedReader(new FileReader(alphaFile));
String line = null;
StringTokenizer tokenizer = null;
int currentlyLastBatch = 0; // index of last batch of the current time window
int currentBatch = 0; // batch index of the current example
Example currentExample = null;
ExampleReader exampleIterator = exampleSet.getExampleReader();
currentlyLastBatch = ((BatchedExampleSet) exampleSet).getLastBatch();
while (((line = alphaFileReader.readLine()) != null)
&& ((currentExample = exampleIterator.next()) != null)) {
line = line.trim();
currentBatch = (int) currentExample.getValue(((BatchedExampleSet) exampleSet).getBatchIndexAttribute());
if (currentBatch == currentlyLastBatch){
tokenizer = new StringTokenizer(line);
if (tokenizer.countTokens() < 1) {
throw new UserError(this, 911, new Object[] {
alphaFile, "Each line must contain at least one value."});
}
currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));
alphaSum += currentAlpha;
if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }
if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }
noOfExamples++;
}
}
} catch (IOException e) {
throw new UserError(this, e, 302, alphaFile);
}
error = noOfAlphasGreaterOne;
LogService.logMessage(" Estimated number of errors: "+(long)error+" of "+noOfExamples+" examples",
LogService.MINIMUM);
if (noOfExamples > 0) {
error /= noOfExamples;
noOfAlphasGreaterOne /= noOfExamples;
alphaSum /= noOfExamples;
alphaGreaterOneSum /= noOfExamples;
} else {
error = 1.0;
LogService.logMessage("SVMLightLearner '"+this.getName()+"': setting error estimation to 1.0, " +
"because number of examples for estimation equals zero", LogService.WARNING);
}
LogService.logMessage(" Estimated error: "+error+" of "+noOfExamples, LogService.MINIMUM);
// ---- provide <tt>PerformanceVector</tt>-object to be passed to operator output by ----
// ---- the super class method <tt>apply()</tt> ----
PerformanceVector pv = new PerformanceVector(); // note: the first criterion in the vector is the one used for optimization
if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_SUM) {
pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaSum, exampleSet.getSize(), true));
}
if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM) {
pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaGreaterOneSum,exampleSet.getSize(), true));
}
if (estimationCriterion == ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE) {
pv.addCriterion (new EstimatedPerformance ("no_of_alphas_greater_one", noOfAlphasGreaterOne, exampleSet.getSize(), true));
}
pv.addCriterion (new EstimatedPerformance ("xialpha_error", error, exampleSet.getSize(), true));
pv.addCriterion (new EstimatedPerformance ("xialpha_accuracy", (1.0-error), exampleSet.getSize(), false));
LogService.logMessage("SVMLearner: ESTIMATOR error = " + error +
" (at batch " + ((BatchedExampleSet) exampleSet).getLastBatch() + ")\n" +
" ESTIMATOR no. of alphas greater one = " + noOfAlphasGreaterOne + "\n" +
" ESTIMATOR alpha sum = " + alphaSum + "\n" +
" ESTIMATOR alpha greater one sum = " + alphaGreaterOneSum,
LogService.TASK);
return pv;
} // end private PerformanceVector scanXiAlpha(exampleSet,alphaFile) // RK/2002/09/13
private double extractValue(String line) throws NumberFormatException, IndexOutOfBoundsException {
// SVM^light output format: "error<=50%", "precision=>40%" etc.
String pre = line.substring(line.indexOf("="));
if (pre.startsWith("=>")) { pre = pre.substring(1); }
String value = pre.substring(1, pre.indexOf("%"));
if (value.toLowerCase().equals("nan")) return Double.NaN;
else return Double.parseDouble(value)/100.0;
} // end private double extractValue
// /** very similar to <code>Example.toSparseString</code>, but writes doubles */
// public static String toSparseString(Example e) {
// StringBuffer str = new StringBuffer();
// boolean first = true;
// for (int i = 0; i < e.getNumberOfAttributes(); i++) {
// if (e.getValue(i) != 0.0) {
// if (!first) str.append(" ");
// str.append((i+1)+":"+e.getValue(i));
// first = false;
// }
// }
// return str.toString();
// } // end private String toSparseString
/** sets the index of the class to use as "positive" (+1), e.g.
* <code>setPositiveLabelIndex(attribute.mapString("positive"))</code>
*/
public void setPositiveLabelIndex(int index) {
this.positiveLabelIndex = index;
}
/** sets the index of the class to use as "unlabelled" (0) for transduction, e.g.
* <code>setUnlabelledLabelIndex(attribute.mapString("missing"))</code>.
* If transduction should not be used, set index to -1.
*/
public void setUnlabeledLabelIndex(int index) {
this.unlabelledIndex = index;
}
public List getParameterTypes() {
List types = super.getParameterTypes();
ParameterType type = new ParameterTypeCategory("kernel_type", "Kernel of the support vector machine.", KERNEL_TYPES, 0);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeString("additional_parameters", "Additional parameters for the SVMlight."));
types.add(new ParameterTypeCategory("task_type", "The type of the task, i.e. classification or regression.",
TASK_TYPES, AUTO));
return types;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -