📄 mysvmlearner.java
字号:
/** This method is only called, if the <tt>xi_alpha_estimation</tt> parameter of this operator
* is set to <tt>true</tt> (default: <tt>false</tt>) and if the learning tasks is a classification
* task (parameter <tt>pattern</tt> set to <tt>true</tt>).
* This method reads the xi-alpha-criterion values for the examples from the corresponding mySVM
* output file, computes classification error and accuracy (not yet precision and recall) estimates,
* creates an <tt>EstimatedPerformance</tt>-object for these xi-alpha-performance estimates, and
* adds it to the output of the operator.<br>
* <b>NOTE:</b> if the operator option <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>
* <b>and</b> the <tt>ExampleSet</tt> passed to this operator set is a <tt>BatchedExampleSet</tt>,
* the performance estimation is only performed on the currently last batch of the example set.
* Otherwise the performance estimation is performed on the complete example set.
*
* <p>NOTE: currently two alternative estimation values are provided by the mySVM learner:
* <ol>
* <li>sum of the absolute values of all alpha values, whose absolute value is greater or euqal to one</li>
* <li>estimation of the expected error based on Thorsten Joachims xi-alpha-estimator criterion</li>
* </ol>
* Currently the first of the two criteria is used; in future versions this should be speficiable by
* a parameter of this operator.
* </p>
*
* <p>NOTE: the mySVM file <tt>mysvm.xialpha</tt> contains one line for each training example,
* where each line contains two values:
* <ol>
* <li>alpha value of the corresponding example</li>
* <li>value of the xi-alpha-criterion of the corresponding example (if this value is greater than or equal
* to 1.0, this example may produce a leave-one-out error if it is left out in training)
* </ol>
* </p>
*/
private PerformanceVector scanXiAlphaValues (ExampleSet exampleSet, File xiAlphaFile) throws OperatorException { // 2003/07/30: new
//// 2003/07/30: new version: mySVM reads data from file 'xyz'
//// => mySVM stores xi-alpha estimations in file 'xyz.xialpha'
// private PerformanceVector scanXiAlphaValues (ExampleSet exampleSet) throws OperatorException { // 2003/07/30: old
//// 2003/07/30: old version: data piped to mySVM
//// 2003/07/30: => mySVM stores xi-alpha estimations in file 'mysvm.xialpha'
int estimationCriterion = ESTIMATION_CRITERION_ERROR; // RK/2002/09/18
// int estimationCriterion = ESTIMATION_CRITERION_ALPHA_SUM; // RK/2002/07/05
// int estimationCriterion = ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM; // RK/2002/09/13
// int estimationCriterion = ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE; // RK/2002/09/12
LogService.logMessage("MySVMLearner '"+getName()+"': xi-alpha-performance estimation "+
"by mySVM's program 'mysvm', reading 'mysvm.xialpha':\n", LogService.MINIMUM);
boolean estimateOnAllBatches; // 'true' = use all examples for xi-alpha-estimation,
// 'false' = use only examples from the currently last batch;
// File svmXiAlphaFile = new File(TempFileService.getTempDir(), "mysvm.xialpha"); // 2003/07/30: old
// File xiAlphaFile = TempFileService.createTempFile(getName()+"_xialpha_", svmXiAlphaFile); // 2003/07/30: old
int noOfExamples = 0;
int noOfAlphasGreaterOne = 0;
double error = 0.0;
double alphaSum = 0.0;
double alphaGreaterOneSum = 0.0;
double currentAlpha = 0.0;
// BatchedExampleSet: use only examples from last batch for xi-alpha-estimation; Otherwise: use all examples:
estimateOnAllBatches = !(exampleSet instanceof BatchedExampleSet);
try {
BufferedReader xiAlphaFileReader = new BufferedReader(new FileReader(xiAlphaFile));
String line = null;
StringTokenizer tokenizer = null;
if (estimateOnAllBatches) {
while ((line = xiAlphaFileReader.readLine()) != null) {
line = line.trim();
tokenizer = new StringTokenizer(line);
if (tokenizer.countTokens() < 2) {
throw new UserError(this, 911, new Object[] { "mysvm's xi-alpha file",
"Each line must contain at least two values."});
}
currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));
alphaSum += currentAlpha;
if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }
if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }
if (Double.parseDouble(tokenizer.nextToken()) >= 1) { // (xi-alpha-criterion >= 1)
error++; // => potential error
}
noOfExamples++;
}
} else {
int currentlyLastBatch = 0; // index of last batch of the current time window
int currentBatch = 0; // batch index of the current example
Example currentExample = null;
ExampleReader exampleIterator = exampleSet.getExampleReader();
currentlyLastBatch = ((BatchedExampleSet) exampleSet).getLastBatch();
while (((line = xiAlphaFileReader.readLine()) != null)
&& ((currentExample = exampleIterator.next()) != null)) {
line = line.trim();
currentBatch = (int) currentExample.getValue(((BatchedExampleSet) exampleSet).getBatchIndexAttribute());
if (currentBatch == currentlyLastBatch){
tokenizer = new StringTokenizer(line);
if (tokenizer.countTokens() < 2) {
throw new UserError(this, 911, new Object[] { "mysvm's xi-alpha file",
"Each line must contain at least two values."});
}
currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));
alphaSum += currentAlpha;
if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }
if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }
if (Double.parseDouble(tokenizer.nextToken()) >= 1) { // (xi-alpha-criterion >= 1)
error++; // => potentieller Fehler;
}
noOfExamples++;
}
}
}
xiAlphaFileReader.close();
} catch (IOException e) {
throw new UserError(this, e, 302, xiAlphaFile);
} finally {
TempFileService.deleteTempFile(xiAlphaFile);
}
LogService.logMessage("MySVMLearner '"+getName()+"': estimated number of errors: "+
(long)error+" of "+noOfExamples+" examples",
LogService.MINIMUM);
if (noOfExamples > 0) {
error /= noOfExamples;
noOfAlphasGreaterOne /= noOfExamples;
alphaSum /= noOfExamples;
alphaGreaterOneSum /= noOfExamples;
} else {
error = 1.0;
LogService.logMessage("MySVMLearner '"+this.getName()+"': setting error estimation to 1.0, " +
"because number of examples for estimation equals zero", LogService.WARNING);
}
LogService.logMessage("MySVMLearner '"+getName()+"': estimated error: "+
error+" of "+noOfExamples, LogService.MINIMUM);
// ---- provide <tt>PerformanceVector</tt>-object to be passed to operator output by ----
// ---- the super class method <tt>apply()</tt> ----
PerformanceVector pv = new PerformanceVector(); // note: the first criterion in the vector is the one used
// for optimization (RK: really? not 'main criterion'?)
if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_SUM) {
pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaSum,
exampleSet.getSize(), true));
}
if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM) {
pv.addCriterion (new EstimatedPerformance ("alpha_greater_one_sum", alphaGreaterOneSum,
exampleSet.getSize(), true));
}
if (estimationCriterion == ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE) {
pv.addCriterion (new EstimatedPerformance ("no_of_alphas_greater_one", noOfAlphasGreaterOne,
exampleSet.getSize(), true));
}
pv.addCriterion (new EstimatedPerformance ("xialpha_error", error, exampleSet.getSize(), true));
pv.addCriterion (new EstimatedPerformance ("xialpha_accuracy", (1.0-error), exampleSet.getSize(), false));
LogService.logMessage("MySVMLearner: ESTIMATOR error = " + error +
" (at batch " + ((BatchedExampleSet) exampleSet).getLastBatch() + ")\n" +
" ESTIMATOR no. of alphas greater one = " + noOfAlphasGreaterOne + "\n" +
" ESTIMATOR alpha sum = " + alphaSum + "\n" +
" ESTIMATOR alpha greater one sum = " + alphaGreaterOneSum,
LogService.TASK);
return pv;
} // end of 'private EstimatedPerformance scanXiAlphaValues()'
/** returns <tt>true</tt>, if the parameter <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>
* (the default is <tt>false</tt>), and if the learning task is a classification task, because
* in this case mySVM can use xi-alpha-estimates to estimate the performance on the training set
* without using a test set.
*/
public boolean canEstimatePerformance() {
boolean useXiAlpha = getParameterAsBoolean("xi_alpha_estimation");
// The parameter "pattern" or "regression" does not exist!
// The parameter "task_type" COULD be used, but might be AUTO
// boolean isClassification = ((getParameterAsString("pattern") != null) || // parameter 'pattern' set or
// (getParameterAsString("regression") == null)); // parameter 'regression' not set
// // The method 'taskIsClassification(ExampleSet)' is more accurate than the above test, but requires an
// // <tt>ExampleSet</tt>-object, which is not available here. The above test cannot verify, wether the
// // label attribute is nominal and has exactly two labels as usually required for SVM classification.
if (useXiAlpha) {
// if (isClassification) { return true; }
// LogService.logMessage("MySVMLearner '"+getName()+"': parameter 'xi_alpha_estimation' is set to true, " +
// "but parameter 'pattern' is not set. Xi-alpha-estimation may only " +
// "be performed for classification tasks and hence is not performed here. " +
// "If a consecutive or encapsulating operator expects the results of this " +
// "estimation, the operator chain will fail to work properly.",
// LogService.WARNING);
return false;
}
return false;
}
/** returns an object of the class <tt>EstimatedPerformance</tt> containing the xi-alpha-performance
* estimates of the learned mySVM model, if the parameter <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>
* (the default is <tt>false</tt>), and if the learning task is a classification task, because
* in this case mySVM can use xi-alpha-estimates to estimate the performance on the training set
* without using a test set. Otherwise the method returns <tt>null</tt>.
*/
public PerformanceVector getEstimatedPerformance() {
return performanceEstimation;
}
/** sets the index of the class to use as "positive" (+1), for example
* <tt>setPositiveLabelIndex(attribute.mapString("positive"))</tt>
*/
public void setPositiveLabelIndex(int index) {
this.positiveLabelIndex = index;
}
/** sepcifies the parameters of the <tt>MySVMLearner</tt>, their types,
* their default values, and descriptions of them.
*/
public List getParameterTypes() {
List types = super.getParameterTypes();
for (int i = 0; i < KERNEL_PARAMETER.length; i++) {
if (KERNEL_PARAMETER[i].equals("type")) {
types.add(new ParameterTypeStringCategory("type", "The SVM kernel type.",
KERNEL_TYPES));
} else {
ParameterType type = new ParameterTypeDouble(KERNEL_PARAMETER[i], "The SVM kernel parameter "+KERNEL_PARAMETER[i]+".",
Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true);
type.setExpert(false);
types.add(type);
}
}
types.add(new ParameterTypeBoolean("weighted_examples", "If set to true, the weight of the examples is used.",
false));
types.add(new ParameterTypeBoolean("sparse", "If set to true, sparse format is used for the input of the SVM.",
false));
types.add(new ParameterTypeCategory("task_type", "The type of the task, i.e. classification or regression.",
TASK_TYPES, AUTO));
types.add(new ParameterTypeBoolean("xi_alpha_estimation", "If set to true, the xi-alpha performance is estimated.",
false));
types.add(new ParameterTypeBoolean("scale", "If set to true, the training examples are scaled "+
"to mean 0 and variance 1. Setting this parameter to false "+
"may reduce numerical stability!",
true));
for (int i = 0; i < PARAMETER.length; i++)
types.add(PARAMETER[i]);
return types;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -