📄 mysvmlearner.java

📁 一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考。来自数据挖掘工具YALE工具包。
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23


    /** This method is only called, if the <tt>xi_alpha_estimation</tt> parameter of this operator
     * is set to <tt>true</tt> (default: <tt>false</tt>) and if the learning tasks is a classification
     * task (parameter <tt>pattern</tt> set to <tt>true</tt>).
     * This method reads the xi-alpha-criterion values for the examples from the corresponding mySVM
     * output file, computes classification error and accuracy (not yet precision and recall) estimates,
     * creates an <tt>EstimatedPerformance</tt>-object for these xi-alpha-performance estimates, and 
     * adds it to the output of the operator.<br>
     * <b>NOTE:</b> if the operator option <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>
     * <b>and</b> the <tt>ExampleSet</tt> passed to this operator set is a <tt>BatchedExampleSet</tt>,
     * the performance estimation is only performed on the currently last batch of the example set.
     * Otherwise the performance estimation is performed on the complete example set.
     *
     * <p>NOTE: currently two alternative estimation values are provided by the mySVM learner:
     * <ol>
     *   <li>sum of the absolute values of all alpha values, whose absolute value is greater or euqal to one</li>
     *   <li>estimation of the expected error based on Thorsten Joachims xi-alpha-estimator criterion</li>
     * </ol>
     * Currently the first of the two criteria is used; in future versions this should be speficiable by
     * a parameter of this operator.
     * </p>
     *
     * <p>NOTE: the mySVM file <tt>mysvm.xialpha</tt> contains one line for each training example,
     * where each line contains two values:
     * <ol>
     *   <li>alpha value of the corresponding example</li>
     *   <li>value of the xi-alpha-criterion of the corresponding example (if this value is greater than or equal 
     *       to 1.0, this example may produce a leave-one-out error if it is left out in training)
     * </ol>
     * </p>
     */
    private PerformanceVector  scanXiAlphaValues (ExampleSet exampleSet, File xiAlphaFile) throws OperatorException { // 2003/07/30: new
	//// 2003/07/30: new version: mySVM reads data from file 'xyz'
	////                          => mySVM stores xi-alpha estimations in file 'xyz.xialpha'
	// private PerformanceVector  scanXiAlphaValues (ExampleSet exampleSet) throws OperatorException {      // 2003/07/30: old
	//// 2003/07/30: old version: data piped to mySVM 
	//// 2003/07/30:              => mySVM stores xi-alpha estimations in file 'mysvm.xialpha'

 	int estimationCriterion = ESTIMATION_CRITERION_ERROR;                          // RK/2002/09/18
 	// int estimationCriterion = ESTIMATION_CRITERION_ALPHA_SUM;                   // RK/2002/07/05
 	// int estimationCriterion = ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM;       // RK/2002/09/13
 	// int estimationCriterion = ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE;    // RK/2002/09/12

	LogService.logMessage("MySVMLearner '"+getName()+"': xi-alpha-performance estimation "+
			      "by mySVM's program 'mysvm', reading 'mysvm.xialpha':\n", LogService.MINIMUM);
	boolean  estimateOnAllBatches;   // 'true'  = use all examples for xi-alpha-estimation,
	                                 // 'false' = use only examples from the currently last batch;

	// File     svmXiAlphaFile    = new File(TempFileService.getTempDir(), "mysvm.xialpha");                // 2003/07/30: old
	// File     xiAlphaFile       = TempFileService.createTempFile(getName()+"_xialpha_", svmXiAlphaFile);  // 2003/07/30: old

        int      noOfExamples         = 0;
	int      noOfAlphasGreaterOne = 0;
	double   error                = 0.0;
	double   alphaSum             = 0.0;
	double   alphaGreaterOneSum   = 0.0;
	double   currentAlpha         = 0.0;

	// BatchedExampleSet: use only examples from last batch for xi-alpha-estimation; Otherwise: use all examples:
	estimateOnAllBatches = !(exampleSet instanceof BatchedExampleSet);

	try {
	    BufferedReader   xiAlphaFileReader = new BufferedReader(new FileReader(xiAlphaFile));
	    String           line              = null;
	    StringTokenizer  tokenizer         = null;

	    if (estimateOnAllBatches) {
		while ((line = xiAlphaFileReader.readLine()) != null) {
		    line = line.trim();
		    tokenizer = new StringTokenizer(line);
		    if (tokenizer.countTokens() < 2) {
			throw new UserError(this, 911, new Object[] { "mysvm's xi-alpha file", 
								      "Each line must contain at least two values."});
		    }
		    currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));
		    alphaSum += currentAlpha;
		    if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }
		    if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }
		    if (Double.parseDouble(tokenizer.nextToken()) >= 1) {       // (xi-alpha-criterion >= 1)
			error++;                                                //   => potential error
		    }  
		    noOfExamples++;
		}
	    } else {
		int            currentlyLastBatch = 0;     // index of last batch of the current time window
		int            currentBatch       = 0;     // batch index of the current example
		Example        currentExample     = null;
		ExampleReader  exampleIterator    = exampleSet.getExampleReader();

		currentlyLastBatch = ((BatchedExampleSet) exampleSet).getLastBatch();

		while (((line = xiAlphaFileReader.readLine()) != null)
		       && ((currentExample = exampleIterator.next()) != null)) {
		    line = line.trim();
		    currentBatch = (int) currentExample.getValue(((BatchedExampleSet) exampleSet).getBatchIndexAttribute());
		    if (currentBatch == currentlyLastBatch){
			tokenizer = new StringTokenizer(line);
			if (tokenizer.countTokens() < 2) {
			    throw new UserError(this, 911, new Object[] { "mysvm's xi-alpha file", 
									  "Each line must contain at least two values."});
			}
			currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));
			alphaSum += currentAlpha;
			if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }
			if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }
			if (Double.parseDouble(tokenizer.nextToken()) >= 1) {       // (xi-alpha-criterion >= 1)
			    error++;                                                //   => potentieller Fehler;
			}  
			noOfExamples++;
		    }
		}
	    }
	    xiAlphaFileReader.close();
	} catch (IOException e) {
	    throw new UserError(this, e, 302, xiAlphaFile);
	} finally {
	    TempFileService.deleteTempFile(xiAlphaFile);
	}	

	LogService.logMessage("MySVMLearner '"+getName()+"': estimated number of errors:  "+
			      (long)error+" of "+noOfExamples+" examples",
			      LogService.MINIMUM);
	if (noOfExamples > 0) {
	    error                /= noOfExamples;
	    noOfAlphasGreaterOne /= noOfExamples;
	    alphaSum             /= noOfExamples;
	    alphaGreaterOneSum   /= noOfExamples;
	} else { 
	    error = 1.0;
	    LogService.logMessage("MySVMLearner '"+this.getName()+"': setting error estimation to 1.0, " +
				  "because number of examples for estimation equals zero", LogService.WARNING);
	}
	LogService.logMessage("MySVMLearner '"+getName()+"': estimated error:             "+
			      error+" of "+noOfExamples, LogService.MINIMUM);

	// ---- provide <tt>PerformanceVector</tt>-object to be passed to operator output by ----
	// ---- the super class method <tt>apply()</tt>                                      ----
	PerformanceVector  pv = new PerformanceVector();    // note: the first criterion in the vector is the one used
	                                                    //       for optimization (RK: really? not 'main criterion'?)
	if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_SUM) {
	    pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaSum, 
						       exampleSet.getSize(), true));
	}
	if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM) {
	    pv.addCriterion (new EstimatedPerformance ("alpha_greater_one_sum", alphaGreaterOneSum, 
						       exampleSet.getSize(), true));
	}
	if (estimationCriterion == ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE) {
	    pv.addCriterion (new EstimatedPerformance ("no_of_alphas_greater_one", noOfAlphasGreaterOne,
						       exampleSet.getSize(), true));
	}
	pv.addCriterion (new EstimatedPerformance ("xialpha_error", error, exampleSet.getSize(), true));
	pv.addCriterion (new EstimatedPerformance ("xialpha_accuracy", (1.0-error), exampleSet.getSize(), false));

	LogService.logMessage("MySVMLearner: ESTIMATOR error                   = " + error + 
			      "  (at batch " + ((BatchedExampleSet) exampleSet).getLastBatch() + ")\n" +
			      "            ESTIMATOR no. of alphas greater one = " + noOfAlphasGreaterOne + "\n" +
			      "            ESTIMATOR alpha sum                 = " + alphaSum + "\n" +
			      "            ESTIMATOR alpha greater one sum     = " + alphaGreaterOneSum,
			      LogService.TASK);

        return pv;
    } // end of 'private EstimatedPerformance scanXiAlphaValues()'



    /** returns <tt>true</tt>, if the parameter <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>
     *  (the default is <tt>false</tt>), and if the learning task is a classification task, because
     *  in this case mySVM can use xi-alpha-estimates to estimate the performance on the training set
     *  without using a test set.
     */
    public boolean  canEstimatePerformance() {
	boolean  useXiAlpha       = getParameterAsBoolean("xi_alpha_estimation");
//      The parameter "pattern" or "regression" does not exist!
//      The parameter "task_type" COULD be used, but might be AUTO
//  	boolean  isClassification = ((getParameterAsString("pattern") != null) ||    // parameter 'pattern' set  or
//  				     (getParameterAsString("regression") == null));  // parameter 'regression' not set
//  	    // The method 'taskIsClassification(ExampleSet)' is more accurate than the above test, but requires an
//  	    // <tt>ExampleSet</tt>-object, which is not available here. The above test cannot verify, wether the
//  	    // label attribute is nominal and has exactly two labels as usually required for SVM classification.

	if (useXiAlpha) {
//  	    if (isClassification) { return true; }
//  	    LogService.logMessage("MySVMLearner '"+getName()+"': parameter 'xi_alpha_estimation' is set to true, " +
//  				  "but parameter 'pattern' is not set. Xi-alpha-estimation may only " +
//  				  "be performed for classification tasks and hence is not performed here. " +
//  				  "If a consecutive or encapsulating operator expects the results of this " +
//  				  "estimation, the operator chain will fail to work properly.",
//  				  LogService.WARNING);
	    return false;
	}
        return false;
    }

    /** returns an object of the class <tt>EstimatedPerformance</tt> containing the xi-alpha-performance
     *  estimates of the learned mySVM model, if the parameter <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>
     *  (the default is <tt>false</tt>), and if the learning task is a classification task, because
     *  in this case mySVM can use xi-alpha-estimates to estimate the performance on the training set
     *  without using a test set. Otherwise the method returns <tt>null</tt>.
     */
    public PerformanceVector  getEstimatedPerformance() {
        return performanceEstimation;
    }

    /** sets the index of the class to use as "positive" (+1), for example
     *  <tt>setPositiveLabelIndex(attribute.mapString("positive"))</tt>
     */
    public void  setPositiveLabelIndex(int index) {
	this.positiveLabelIndex = index;
    }

    /** sepcifies the parameters of the <tt>MySVMLearner</tt>, their types, 
     *  their default values, and descriptions of them.
     */
    public List getParameterTypes() {
	List types = super.getParameterTypes();

	for (int i = 0; i < KERNEL_PARAMETER.length; i++) {
	    if (KERNEL_PARAMETER[i].equals("type")) {
		types.add(new ParameterTypeStringCategory("type", "The SVM kernel type.",
							  KERNEL_TYPES));
	    } else {
		ParameterType type = new ParameterTypeDouble(KERNEL_PARAMETER[i], "The SVM kernel parameter "+KERNEL_PARAMETER[i]+".",
							     Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true);
		type.setExpert(false);
		types.add(type);
	    }
	}

	types.add(new ParameterTypeBoolean("weighted_examples", "If set to true, the weight of the examples is used.", 
					   false));
	types.add(new ParameterTypeBoolean("sparse", "If set to true, sparse format is used for the input of the SVM.", 
					   false));
	types.add(new ParameterTypeCategory("task_type", "The type of the task, i.e. classification or regression.", 
					    TASK_TYPES, AUTO));
	types.add(new ParameterTypeBoolean("xi_alpha_estimation", "If set to true, the xi-alpha performance is estimated.", 
					   false));
	types.add(new ParameterTypeBoolean("scale", "If set to true, the training examples are scaled "+
					   "to mean 0 and variance 1. Setting this parameter to false "+
					   "may reduce numerical stability!",
					   true));

  	for (int i = 0; i < PARAMETER.length; i++)
  	    types.add(PARAMETER[i]);

	return types;
    }
}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -