📄 mysvmlearner.java

📁 著名的开源仿真软件yale
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
	    throw new UserError(null, 110, new Integer(2));    // ?? comment ?? user error 110 ??    }    /** This method is only called, if the <tt>xi_alpha_estimation</tt> parameter of this operator     * is set to <tt>true</tt> (default: <tt>false</tt>) and if the learning tasks is a classification     * task (parameter <tt>pattern</tt> set to <tt>true</tt>).     * This method reads the xi-alpha-criterion values for the examples from the corresponding mySVM     * output file, computes classification error and accuracy (not yet precision and recall) estimates,     * creates an <tt>EstimatedPerformance</tt>-object for these xi-alpha-performance estimates, and      * adds it to the output of the operator.<br>     * <b>NOTE:</b> if the operator option <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>     * <b>and</b> the <tt>ExampleSet</tt> passed to this operator set is a <tt>BatchedExampleSet</tt>,     * the performance estimation is only performed on the currently last batch of the example set.     * Otherwise the performance estimation is performed on the complete example set.     *     * <p>NOTE: currently two alternative estimation values are provided by the mySVM learner:     * <ol>     *   <li>sum of the absolute values of all alpha values, whose absolute value is greater or euqal to one</li>     *   <li>estimation of the expected error based on Thorsten Joachims xi-alpha-estimator criterion</li>     * </ol>     * Currently the first of the two criteria is used; in future versions this should be speficiable by     * a parameter of this operator.     * </p>     *     * <p>NOTE: the mySVM file <tt>mysvm.xialpha</tt> contains one line for each training example,     * where each line contains two values:     * <ol>     *   <li>alpha value of the corresponding example</li>     *   <li>value of the xi-alpha-criterion of the corresponding example (if this value is greater than or equal      *       to 1.0, this example may produce a leave-one-out error if it is left out in training)     * </ol>     * </p>     */    private PerformanceVector  scanXiAlphaValues (ExampleSet exampleSet, File xiAlphaFile) throws OperatorException { // 2003/07/30: new	//// 2003/07/30: new version: mySVM reads data from file 'xyz'	////                          => mySVM stores xi-alpha estimations in file 'xyz.xialpha'	// private PerformanceVector  scanXiAlphaValues (ExampleSet exampleSet) throws OperatorException {      // 2003/07/30: old	//// 2003/07/30: old version: data piped to mySVM 	//// 2003/07/30:              => mySVM stores xi-alpha estimations in file 'mysvm.xialpha' 	int estimationCriterion = ESTIMATION_CRITERION_ERROR;                          // RK/2002/09/18 	// int estimationCriterion = ESTIMATION_CRITERION_ALPHA_SUM;                   // RK/2002/07/05 	// int estimationCriterion = ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM;       // RK/2002/09/13 	// int estimationCriterion = ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE;    // RK/2002/09/12	LogService.logMessage("MySVMLearner '"+getName()+"': xi-alpha-performance estimation "+			      "by mySVM's program 'mysvm', reading 'mysvm.xialpha':\n", LogService.MINIMUM);	boolean  estimateOnAllBatches;   // 'true'  = use all examples for xi-alpha-estimation,	                                 // 'false' = use only examples from the currently last batch;	// File     svmXiAlphaFile    = new File(TempFileService.getTempDir(), "mysvm.xialpha");                // 2003/07/30: old	// File     xiAlphaFile       = TempFileService.createTempFile(getName()+"_xialpha_", svmXiAlphaFile);  // 2003/07/30: old        int      noOfExamples         = 0;	int      noOfAlphasGreaterOne = 0;	double   error                = 0.0;	double   alphaSum             = 0.0;	double   alphaGreaterOneSum   = 0.0;	double   currentAlpha         = 0.0;	// BatchedExampleSet: use only examples from last batch for xi-alpha-estimation; Otherwise: use all examples:	estimateOnAllBatches = !(exampleSet instanceof BatchedExampleSet);	try {	    BufferedReader   xiAlphaFileReader = new BufferedReader(new FileReader(xiAlphaFile));	    String           line              = null;	    StringTokenizer  tokenizer         = null;	    if (estimateOnAllBatches) {		while ((line = xiAlphaFileReader.readLine()) != null) {		    line = line.trim();		    tokenizer = new StringTokenizer(line);		    if (tokenizer.countTokens() < 2) {			throw new FatalException("MySVMLearner '" + this.getName() + "': each line in the " +						 "mySVM file 'mysvm.xialpha' must contain at least two values.");		    }		    currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));		    alphaSum += currentAlpha;		    if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }		    if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }		    if (Double.parseDouble(tokenizer.nextToken()) >= 1) {       // (xi-alpha-criterion >= 1)			error++;                                                //   => potential error		    }  		    noOfExamples++;		}	    } else {		int            currentlyLastBatch = 0;     // index of last batch of the current time window		int            currentBatch       = 0;     // batch index of the current example		Example        currentExample     = null;		ExampleReader  exampleIterator    = exampleSet.getExampleReader();		currentlyLastBatch = ((BatchedExampleSet) exampleSet).getLastBatch();		while (((line = xiAlphaFileReader.readLine()) != null)		       && ((currentExample = exampleIterator.next()) != null)) {		    line = line.trim();		    currentBatch = (int) currentExample.getValue(((BatchedExampleSet) exampleSet).getBatchIndexAttribute());		    if (currentBatch == currentlyLastBatch){			tokenizer = new StringTokenizer(line);			if (tokenizer.countTokens() < 2) {			    throw new FatalException("MySVMLearner '" + this.getName() + "': each line in the " +						     "mySVM file 'mysvm.xialpha' must contain at least two values.");			}			currentAlpha = Math.abs(Double.parseDouble(tokenizer.nextToken()));			alphaSum += currentAlpha;			if (currentAlpha >= 1.0) { alphaGreaterOneSum += currentAlpha; }			if (currentAlpha > 1.0) { noOfAlphasGreaterOne++; }			if (Double.parseDouble(tokenizer.nextToken()) >= 1) {       // (xi-alpha-criterion >= 1)			    error++;                                                //   => potentieller Fehler;			}  			noOfExamples++;		    }		}	    }	    xiAlphaFileReader.close();	} catch (IOException e) {	    throw new UserError(this, e, 392, xiAlphaFile);	} finally {	    TempFileService.deleteTempFile(xiAlphaFile);	}		LogService.logMessage("MySVMLearner '"+getName()+"': estimated number of errors:  "+			      (long)error+" of "+noOfExamples+" examples",			      LogService.MINIMUM);	if (noOfExamples > 0) {	    error                /= noOfExamples;	    noOfAlphasGreaterOne /= noOfExamples;	    alphaSum             /= noOfExamples;	    alphaGreaterOneSum   /= noOfExamples;	} else { 	    error = 1.0;	    LogService.logMessage("MySVMLearner '"+this.getName()+"': setting error estimation to 1.0, " +				  "because number of examples for estimation equals zero", LogService.WARNING);	}	LogService.logMessage("MySVMLearner '"+getName()+"': estimated error:             "+			      error+" of "+noOfExamples, LogService.MINIMUM);	// ---- provide <tt>PerformanceVector</tt>-object to be passed to operator output by ----	// ---- the super class method <tt>apply()</tt>                                      ----	PerformanceVector  pv = new PerformanceVector();    // note: the first criterion in the vector is the one used	                                                    //       for optimization (RK: really? not 'main criterion'?)	if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_SUM) {	    pv.addCriterion (new EstimatedPerformance ("alpha_sum", alphaSum, 						       exampleSet.getSize(), true));	}	if (estimationCriterion == ESTIMATION_CRITERION_ALPHA_GREATER_ONE_SUM) {	    pv.addCriterion (new EstimatedPerformance ("alpha_greater_one_sum", alphaGreaterOneSum, 						       exampleSet.getSize(), true));	}	if (estimationCriterion == ESTIMATION_CRITERION_NO_OF_ALPHAS_GREATER_ONE) {	    pv.addCriterion (new EstimatedPerformance ("no_of_alphas_greater_one", noOfAlphasGreaterOne,						       exampleSet.getSize(), true));	}	pv.addCriterion (new EstimatedPerformance ("xialpha_error", error, exampleSet.getSize(), true));	pv.addCriterion (new EstimatedPerformance ("xialpha_accuracy", (1.0-error), exampleSet.getSize(), false));	LogService.logMessage("MySVMLearner: ESTIMATOR error                   = " + error + 			      "  (at batch " + ((BatchedExampleSet) exampleSet).getLastBatch() + ")\n" +			      "            ESTIMATOR no. of alphas greater one = " + noOfAlphasGreaterOne + "\n" +			      "            ESTIMATOR alpha sum                 = " + alphaSum + "\n" +			      "            ESTIMATOR alpha greater one sum     = " + alphaGreaterOneSum,			      LogService.TASK);        return pv;    } // end of 'private EstimatedPerformance scanXiAlphaValues()'    /** returns <tt>true</tt>, if the parameter <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>     *  (the default is <tt>false</tt>), and if the learning task is a classification task, because     *  in this case mySVM can use xi-alpha-estimates to estimate the performance on the training set     *  without using a test set.     */    public boolean  canEstimatePerformance() {	boolean  useXiAlpha       = getParameterAsBoolean("xi_alpha_estimation");	boolean  isClassification = ((getParameterAsString("pattern") != null) ||    // parameter 'pattern' set  or				     (getParameterAsString("regression") == null));  // parameter 'regression' not set	    // The method 'taskIsClassification(ExampleSet)' is more accurate than the above test, but requires an	    // <tt>ExampleSet</tt>-object, which is not available here. The above test cannot verify, wether the	    // label attribute is nominal and has exactly two labels as usually required for SVM classification.	if (useXiAlpha) {	    if (isClassification) { return true; }	    LogService.logMessage("MySVMLearner '"+getName()+"': parameter 'xi_alpha_estimation' is set to true, " +				  "but parameter 'pattern' is not set. Xi-alpha-estimation may only " +				  "be performed for classification tasks and hence is not performed here. " +				  "If a consecutive or encapsulating operator expects the results of this " +				  "estimation, the operator chain will fail to work properly.",				  LogService.WARNING);	    return false;	}        return false;    }    /** returns an object of the class <tt>EstimatedPerformance</tt> containing the xi-alpha-performance     *  estimates of the learned mySVM model, if the parameter <tt>xi_alpha_estimation</tt> is set to <tt>true</tt>     *  (the default is <tt>false</tt>), and if the learning task is a classification task, because     *  in this case mySVM can use xi-alpha-estimates to estimate the performance on the training set     *  without using a test set. Otherwise the method returns <tt>null</tt>.     */    public PerformanceVector  getEstimatedPerformance() {        return performanceEstimation;    }    /** sets the index of the class to use as "positive" (+1), for example     *  <tt>setPositiveLabelIndex(attribute.mapString("positive"))</tt>     */    public void  setPositiveLabelIndex(int index) {	this.positiveLabelIndex = index;    }    /** sepcifies the parameters of the <tt>MySVMLearner</tt>, their types,      *  their default values, and descriptions of them.     */    public List getParameterTypes() {	List types = super.getParameterTypes();	for (int i = 0; i < KERNEL_PARAMETER.length; i++) {	    if (KERNEL_PARAMETER[i].equals("type")) {		types.add(new ParameterTypeStringCategory("type", "The SVM kernel type.",							  KERNEL_TYPES));	    } else {		types.add(new ParameterTypeDouble(KERNEL_PARAMETER[i], "The SVM kernel parameter "+KERNEL_PARAMETER[i]+".",						  Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true));	    }	}	types.add(new ParameterTypeBoolean("weighted_examples", "If set to true, the weight of the examples is used.", 					   false));	types.add(new ParameterTypeBoolean("sparse", "If set to true, sparse format is used for the input of the SVM.", 					   false));	types.add(new ParameterTypeCategory("task_type", "The type of the task, i.e. classification or regression.", 					    TASK_TYPES, AUTO));	types.add(new ParameterTypeBoolean("xi_alpha_estimation", "If set to true, the xi-alpha performance is estimated.", 					   false));	types.add(new ParameterTypeBoolean("scale", "If set to true, the training examples are scaled "+					   "to mean 0 and variance 1. Setting this parameter to false "+					   "may reduce numerical stability!",					   true));  	for (int i = 0; i < PARAMETER.length; i++)  	    types.add(PARAMETER[i]);	return types;    }}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -