📄 mysvmlearner.java
字号:
throw new UserError(null, 110, new Integer(2)); // ?? comment ?? user error 110 ?? } /** This method is only called, if the <tt>xi_alpha_estimation</tt> parameter of this operator * is set to <tt>true</tt> (default: <tt>false</tt>) and if the learning tasks is a classification * task (parameter <tt>pattern</tt> set to <tt>true</tt>). * This method reads the xi-alpha-criterion values for the examples from the corresponding mySVM * output file, computes classification error and accuracy (not yet precision and recall) estimates, * creates an <tt>EstimatedPerformance</tt>-object for these xi-alpha-performance estimates, and * adds it to the output of the operator.<br> * <b>NOTE:</b> if the operator option <tt>xi_alpha_estimation</tt> is set to <tt>true</tt> * <b>and</b> the <tt>ExampleSet</tt> passed to this operator set is a <tt>BatchedExampleSet</tt>, * the performance estimation is only performed on the currently last batch of the example set. * Otherwise the performance estimation is performed on the complete example set. * * <p>NOTE: currently two alternative estimation values are provided by the mySVM learner: * <ol> * <li>sum of the absolute values of all alpha values, whose absolute value is greater or euqal to one</li> * <li>estimation of the expected error based on Thorsten Joachims xi-alpha-estimator criterion</li> * </ol> * Currently the first of the two criteria is used; in future versions this should be speficiable by * a parameter of this operator. * </p> * * <p>NOTE: the mySVM file <tt>mysvm.xialpha</tt> contains one line for each training example, * where each line contains two values: * <ol> * <li>alpha value of the corresponding example</li> * <li>value of the xi-alpha-criterion of the corresponding example (if this value is greater than or equal * to 1.0, this example may produce a leave-one-out error if it is left out in training) * </ol> * </p> */ private PerformanceVector scanXiAlphaValues (ExampleSet exampleSet, File xiAlphaFile) throws OperatorException { // 2003/07/30: new //// 2003/07/30: new version: mySVM reads data from file 'xyz' //// => mySVM stores xi-alpha estimations in file 'xyz.xialpha' // private PerformanceVector scanXiAlphaValues (ExampleSet exampleSet) throws OperatorException { // 2003/07/30: old //// 2003/07/30: old version: data piped to mySVM //// 2003/07/30: => mySVM stores xi-alpha estimations in file 'mysvm.xialpha' int estimationCriterion = ESTIMATION_CRITERION_ERROR; // RK/2002/09/18 // int estimationCriterion = ESTIMATION_CRITERION_ALPHA_SUM; // RK/2002/07/05 // int estimationCriterion = ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM; // RK/2002/09/13 // int estimationCriterion = ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE; // RK/2002/09/12 LogService.logMessage("MySVMLearner '"+getName()+"': xi-alpha-performance estimation "+ "by mySVM's program 'mysvm', reading 'mysvm.xialpha':\n", LogService.MINIMUM); boolean estimateOnAllBatches; // 'true' = use all examples for xi-alpha-estimation, // 'false' = use only examples from the currently last batch; // File svmXiAlphaFile = new File(TempFileService.getTempDir(), "mysvm.xialpha"); // 2003/07/30: old // File xiAlphaFile = TempFileService.createTempFile(getName()+"_xialpha_", svmXiAlphaFile); // 2003/07/30: old int noOfExamples = 0; int noOfAlphasGreaterOne = 0; double error = 0.0; double alphaSum = 0.0; double alphaGreaterOneSum = 0.0; double currentAlpha = 0.0; // BatchedExampleSet: use only examples from last batch for xi-alpha-estimation; Otherwise: use all examples: estimateOnAllBatches = !(exampleSet instanceof BatchedExampleSet); try { BufferedReader xiAlphaFileReader = new BufferedReader(new FileReader(xiAlphaFile)); String line = null; StringTokenizer tokenizer = null; if (estimateOnAllBatches) { while ((line = xiAlphaFileReader.readLine()) != null) { line = line.trim(); tokenizer = new StringTokenizer(line); if (tokenizer.countTokens() < 2) { throw new FatalException("MySVMLearner '" + this.getName() + "': each line in the " + "mySVM file 'mysvm.xialpha' must contain at least two values."); } currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken())); alphaSum += currentAlpha; if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; } if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; } if (Double.parseDouble(tokenizer.nextToken()) >= 1) { // (xi-alpha-criterion >= 1) error++; // => potential error } noOfExamples++; } } else { int currentlyLastBatch = 0; // index of last batch of the current time window int currentBatch = 0; // batch index of the current example Example currentExample = null; ExampleReader exampleIterator = exampleSet.getExampleReader(); currentlyLastBatch = ((BatchedExampleSet) exampleSet).getLastBatch(); while (((line = xiAlphaFileReader.readLine()) != null) && ((currentExample = exampleIterator.next()) != null)) { line = line.trim(); currentBatch = (int) currentExample.getValue(((BatchedExampleSet) exampleSet).getBatchIndexAttribute()); if (currentBatch == currentlyLastBatch){ tokenizer = new StringTokenizer(line); if (tokenizer.countTokens() < 2) { throw new FatalException("MySVMLearner '" + this.getName() + "': each line in the " + "mySVM file 'mysvm.xialpha' must contain at least two values."); } currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken())); alphaSum += currentAlpha; if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; } if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; } if (Double.parseDouble(tokenizer.nextToken()) >= 1) { // (xi-alpha-criterion >= 1) error++; // => potentieller Fehler; } noOfExamples++; } } } xiAlphaFileReader.close(); } catch (IOException e) { throw new UserError(this, e, 392, xiAlphaFile); } finally { TempFileService.deleteTempFile(xiAlphaFile); } LogService.logMessage("MySVMLearner '"+getName()+"': estimated number of errors: "+ (long)error+" of "+noOfExamples+" examples", LogService.MINIMUM); if (noOfExamples > 0) { error /= noOfExamples; noOfAlphasGreaterOne /= noOfExamples; alphaSum /= noOfExamples; alphaGreaterOneSum /= noOfExamples; } else { error = 1.0; LogService.logMessage("MySVMLearner '"+this.getName()+"': setting error estimation to 1.0, " + "because number of examples for estimation equals zero", LogService.WARNING); } LogService.logMessage("MySVMLearner '"+getName()+"': estimated error: "+ error+" of "+noOfExamples, LogService.MINIMUM); // ---- provide <tt>PerformanceVector</tt>-object to be passed to operator output by ---- // ---- the super class method <tt>apply()</tt> ---- PerformanceVector pv = new PerformanceVector(); // note: the first criterion in the vector is the one used // for optimization (RK: really? not 'main criterion'?) if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_SUM) { pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaSum, exampleSet.getSize(), true)); } if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM) { pv.addCriterion (new EstimatedPerformance ("alpha_greater_one_sum", alphaGreaterOneSum, exampleSet.getSize(), true)); } if (estimationCriterion == ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE) { pv.addCriterion (new EstimatedPerformance ("no_of_alphas_greater_one", noOfAlphasGreaterOne, exampleSet.getSize(), true)); } pv.addCriterion (new EstimatedPerformance ("xialpha_error", error, exampleSet.getSize(), true)); pv.addCriterion (new EstimatedPerformance ("xialpha_accuracy", (1.0-error), exampleSet.getSize(), false)); LogService.logMessage("MySVMLearner: ESTIMATOR error = " + error + " (at batch " + ((BatchedExampleSet) exampleSet).getLastBatch() + ")\n" + " ESTIMATOR no. of alphas greater one = " + noOfAlphasGreaterOne + "\n" + " ESTIMATOR alpha sum = " + alphaSum + "\n" + " ESTIMATOR alpha greater one sum = " + alphaGreaterOneSum, LogService.TASK); return pv; } // end of 'private EstimatedPerformance scanXiAlphaValues()' /** returns <tt>true</tt>, if the parameter <tt>xi_alpha_estimation</tt> is set to <tt>true</tt> * (the default is <tt>false</tt>), and if the learning task is a classification task, because * in this case mySVM can use xi-alpha-estimates to estimate the performance on the training set * without using a test set. */ public boolean canEstimatePerformance() { boolean useXiAlpha = getParameterAsBoolean("xi_alpha_estimation"); boolean isClassification = ((getParameterAsString("pattern") != null) || // parameter 'pattern' set or (getParameterAsString("regression") == null)); // parameter 'regression' not set // The method 'taskIsClassification(ExampleSet)' is more accurate than the above test, but requires an // <tt>ExampleSet</tt>-object, which is not available here. The above test cannot verify, wether the // label attribute is nominal and has exactly two labels as usually required for SVM classification. if (useXiAlpha) { if (isClassification) { return true; } LogService.logMessage("MySVMLearner '"+getName()+"': parameter 'xi_alpha_estimation' is set to true, " + "but parameter 'pattern' is not set. Xi-alpha-estimation may only " + "be performed for classification tasks and hence is not performed here. " + "If a consecutive or encapsulating operator expects the results of this " + "estimation, the operator chain will fail to work properly.", LogService.WARNING); return false; } return false; } /** returns an object of the class <tt>EstimatedPerformance</tt> containing the xi-alpha-performance * estimates of the learned mySVM model, if the parameter <tt>xi_alpha_estimation</tt> is set to <tt>true</tt> * (the default is <tt>false</tt>), and if the learning task is a classification task, because * in this case mySVM can use xi-alpha-estimates to estimate the performance on the training set * without using a test set. Otherwise the method returns <tt>null</tt>. */ public PerformanceVector getEstimatedPerformance() { return performanceEstimation; } /** sets the index of the class to use as "positive" (+1), for example * <tt>setPositiveLabelIndex(attribute.mapString("positive"))</tt> */ public void setPositiveLabelIndex(int index) { this.positiveLabelIndex = index; } /** sepcifies the parameters of the <tt>MySVMLearner</tt>, their types, * their default values, and descriptions of them. */ public List getParameterTypes() { List types = super.getParameterTypes(); for (int i = 0; i < KERNEL_PARAMETER.length; i++) { if (KERNEL_PARAMETER[i].equals("type")) { types.add(new ParameterTypeStringCategory("type", "The SVM kernel type.", KERNEL_TYPES)); } else { types.add(new ParameterTypeDouble(KERNEL_PARAMETER[i], "The SVM kernel parameter "+KERNEL_PARAMETER[i]+".", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true)); } } types.add(new ParameterTypeBoolean("weighted_examples", "If set to true, the weight of the examples is used.", false)); types.add(new ParameterTypeBoolean("sparse", "If set to true, sparse format is used for the input of the SVM.", false)); types.add(new ParameterTypeCategory("task_type", "The type of the task, i.e. classification or regression.", TASK_TYPES, AUTO)); types.add(new ParameterTypeBoolean("xi_alpha_estimation", "If set to true, the xi-alpha performance is estimated.", false)); types.add(new ParameterTypeBoolean("scale", "If set to true, the training examples are scaled "+ "to mean 0 and variance 1. Setting this parameter to false "+ "may reduce numerical stability!", true)); for (int i = 0; i < PARAMETER.length; i++) types.add(PARAMETER[i]); return types; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -