📄 conceptdriftadaptor.java

📁 著名的开源仿真软件yale
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *  YALE - Yet Another Learning Environment *  Copyright (C) 2002, 2003 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,  *          Katharina Morik, Oliver Ritthoff *      Artificial Intelligence Unit *      Computer Science Department *      University of Dortmund *      44221 Dortmund,  Germany *  email: yale@ls8.cs.uni-dortmund.de *  web:   http://yale.cs.uni-dortmund.de/ * *  This program is free software; you can redistribute it and/or *  modify it under the terms of the GNU General Public License as  *  published by the Free Software Foundation; either version 2 of the *  License, or (at your option) any later version.  * *  This program is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *  General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *  USA. */package edu.udo.cs.yale.operator.time;import edu.udo.cs.yale.operator.parameter.*;import edu.udo.cs.yale.operator.OperatorException;import edu.udo.cs.yale.operator.FatalException;import edu.udo.cs.yale.MethodNotSupportedException;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.RandomGenerator;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.example.Attribute;import edu.udo.cs.yale.example.Example;import edu.udo.cs.yale.example.ExampleSet;import edu.udo.cs.yale.example.BatchedExampleSet;import edu.udo.cs.yale.example.ExampleReader;import edu.udo.cs.yale.operator.Value;import edu.udo.cs.yale.operator.Operator;import edu.udo.cs.yale.operator.OperatorChain;import edu.udo.cs.yale.operator.ValidationChain;import edu.udo.cs.yale.operator.IllegalInputException;import edu.udo.cs.yale.operator.IOContainer;import edu.udo.cs.yale.operator.IOObject;import edu.udo.cs.yale.operator.IODescription;import edu.udo.cs.yale.operator.learner.Model;import edu.udo.cs.yale.operator.performance.PerformanceVector;import edu.udo.cs.yale.operator.performance.RunVector;import edu.udo.cs.yale.operator.performance.SeriesVector;import edu.udo.cs.yale.operator.performance.PerformanceCriterion;import edu.udo.cs.yale.operator.performance.EstimatedPerformance;import java.util.*;import java.io.*;/** A <code>ConceptDriftAdaptor</code> encapsulates a concept drift experiment *  preserving the original order of the examples in the example set. *  A <code>ConceptDriftAdaptor</code> differs from a {@link ConceptDriftSimulator} *  in that it does not simulate any concept drift but handles the real drift present *  in the example set (if any real concept drift is present in the given example set). *  This operators only simulates the proceeding time steps and creates training and *  test sets from the complete data set for each time step accordingly. *  Whithin the time simulation, the examples are assumed to arrive in batches, i.e. *  a fixed number of examples is assumed to arrive at each single point in time. *  The operator requires as input an example set and returns as its output a *  {@link PerformanceVector} of the performance results obtained by the enclosed *  classification learner/applier chains averaged over all batches and a  *  {@link RunVector} of the result for each batch. *  The latter allows to observe the performance of the enclosed learner/applier *  chains over time. *  <br/> *  The first inner operator (or operator chain) must be a classification learner *  and the second a corresponding classification model applier and evaluator chain, *  usually consisting of a model applier and a performance evaluator, which returns *  a {@link PerformanceVector}. * *  <!-- ===== The following information is automatically generated for the Yale Tutorial: * *  <h4>Parameter operators enclosed in ConceptDriftAdaptor:</h4> *  The parametrization of this operator in the Yale configuration file for the *  experiment has to contain <em>two operators</em> of the following types: *  <ol> *    <li>First a classification learning chain that delivers a learned model *        ({@link Model});</li> *    <li>Second a classification applier chain able to use this model to predict  *        the labels of new examples and evaluate them. This chain delivers a *        {@link PerformanceVector} (describing the performance of the classification *        model in one batch).</li> *  </ol> * *  <h4>Parameters:</h4> *  <ul> *    <li><b>number_of_batches</b>: specifies the number of time steps to be simulated; *                                  the size of a batch, i.e. the number of examples in a *                                  batch, is the total number of examples divided by the *                                  number of batches.</li> *    <li><b>learner_type</b>: type of the enclosed learner: *        <ul> *          <li><i>static</i>:        static learner to be used on all old data *                                    (= full memory approach).</li> *          <li><i>static_window</i>: static learner to be used on a fixe time window on the *                                    old data (= no memory approach, in case of window size 1,  *                                    and other fixed window size approach otherwise).</li> *          <li><i>adaptive</i>:      adaptive learner that maintains an adaptive time window  *                                    or example weighting by itself.</li> *        </ul> *        </li> *    <li><b>window_size</b>: size of the fixed time window in number of batches  *                            (if the learner type <code>static_window</code> is used); *                            this parameter is only considered, if the learner type *                            is <code>static_window</code>, and ignored otherwise. *        </li> *  </ul> * *  <h4>Operator-Input:</h4> *  <ol> *    <li>{@link ExampleSet}: set of examples to be used for the concept drift simulation *                            experiment; the class label attribute must be specified and given.</li> *  </ol> * *  <h4>Operator-Output:</h4> *  <ol> *    <li>{@link PerformanceVector} of averaged performance results ({@link PerformanceCriterion}), *        averaged over all batches.</li> *    <li>{@link RunVector} containing one {@link PerformanceVector} for each batch with the *        performance on this batch.</li> *  </ol> * *  <h4>Values:</h4> *  <ul> *    <li><code>performance</code> returns the current (main) performance criterion value</li> *    <li><code>variance</code> returns the current performance criterion variance (or standard deviation)</li> *  </ul> * *  <h4>Example configuration of this operator in an experiment chain (Yale configuration file in XML format):</h4> *  <pre> *     ... *  </pre> * * *  <h4>Bibliography:</h4> *  ... * *  ===== --> * *  @see edu.udo.cs.yale.operator.time.ConceptDriftSimulator *  @see edu.udo.cs.yale.operator.learner.BatchWindowLearner *  @see edu.udo.cs.yale.operator.learner.BatchWeightLearner * *  @yale.xmlclass ConceptDriftAdaptor * *  @author  Ralf Klinkenberg *  @version $Id: ConceptDriftAdaptor.java,v 2.10 2003/09/04 14:57:51 klinkenb Exp $ */public class ConceptDriftAdaptor extends ValidationChain {    // History:    // -> RK/2003/06/06: first implementation of this operator: single run only, no leave-one-cycle-out evaluation;    private static final Class[]  INPUT_CLASSES  = { ExampleSet.class };   // exactly like a ValidationChain    private static final Class[]  OUTPUT_CLASSES = { PerformanceVector.class, RunVector.class };    /** type of enclosed learner: static learner to be used on all old data (= full memory approach). */    private static final int  STATIC_LEARNER        = 0;    /** type of enclosed learner: static learner to be used on a fixe time window on the old data     *  (= no memory approach for window size 1, or other fixed window size approach otherwise).     */    private static final int  STATIC_WINDOW_LEARNER = 1;    /** type of enclosed learner: adaptive learner that maintains an adaptive time window or example weighting by itself. */    private static final int  ADAPTIVE_LEARNER      = 2;    /** names of the learner types as they may be specified in the Yale configuration file. */    private static final String[]  LEARNER_TYPE_NAMES = {"static","static_window","adaptive"};    /** default time window size of a fixed window (= 3 batches). */    private static final int  DEFAULT_TIME_WINDOW_SIZE = 3;    // private PerformanceCriterion  lastPerformance;   // in super class    // private IOContainer           learningResult;    // in super class        private ExampleSet    inputSet;                   // input example set passed to this operator    private ExampleSet    exampleSet;                 // copy of this example set with additional attributes for internal use                                                      //   (weight, time_index, batch_index, user_interest (= new class label)    // private Attribute  weightAttribute;            // weight attribute (weight of an example)  //// <- obsolete /////    private Attribute     labelAttribute;             // class label attribute    private Attribute     timeIndexAttribute;         // time index attribute (describing simulated order of examples in time)    private Attribute     batchIndexAttribute;        // batch index attribute (number of the batch an example is assigned to)    private int           noOfBatches, currentBatch;    private int           noOfExamples;               // total no. of examples (e.g. documents) in the input example set    private int           batchSize;                  // no. of documents per batch (evenly distributed, left overs discarded)    private int[]         timeIndex2example;          // maps a time point to the example (e.g. document) at that time point    private int[]         example2timeIndex;          // maps an example (e.g. document) to a time point    private int[]         example2batchIndex;         // maps an example (e.g. document) to a batch    private int[]         example2label;              // maps an example (e.g. document) to its (user interest) label    private int[]         noOfExamplesInBatch;        // noOfExamplesInBatch[b] = no. of examples in batch 'b'    private int           learnerType;          // type of enclosed learner (static vs. static window vs. adaptive)    private int           timeWindowSize;       // window size in case of a static window learner (window of fixed size)    public ConceptDriftAdaptor() {	// Add the number of the current batch of this operator as a value visible	// from outside of this operator, which starts with 1 and goes up to 'noOfBatches'	// (= parameter 'number_of_batches'), and which can for example be used to monitor	// the progress of this operator, externally accessible:	addValue(new Value("batch", "The number of the current batch.") {		public double getValue() {		    return currentBatch;		}	    });    }    /** returns the the classes this operator expects as input. */    public Class[] getInputClasses() { return INPUT_CLASSES; }    /** returns the the classes this operator provides as output. */    public Class[] getOutputClasses() { return OUTPUT_CLASSES; }    // ==== apply() : Concept Drift Simulation ====    //    public IOObject[] apply() throws OperatorException {	// #### initialize ####	LogService.logMessage("ConceptDriftAdaptor '" + getName() + 			      "': prepare time simulation for concept drift experiment", 			      LogService.TASK);	// ---- retrieve the operator input objects ----	IOContainer  input = getInput();                                     // operator input objects	inputSet           = (ExampleSet)input.getInput(ExampleSet.class);   // input example set	exampleSet         = (ExampleSet)inputSet.clone();                   // internal copy of the input example set	// ---- retrieve the operator parameter (no. of batches) ----	noOfBatches = getParameterAsInt("number_of_batches");  // min. 2 batches, default 10 batches	// ---- compute further variables ----	/* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': check example set", LogService.MINIMUM);	noOfExamples = (exampleSet.getSize());	batchSize    = ((int) (((double) noOfExamples) / ((double) noOfBatches)));	// sanity check:  min. 2 batches, min. 1 example per batch (=> min. 2 examples):	if (noOfBatches < 2) {	    throw new FatalException("ConceptDriftAdaptor '" + getName() +				     "': There must be at least 2 batches for " +				     "the time simulation in a concept drift " +				     "experiment (here "+noOfBatches+" batches).");	}	if (batchSize < 1) {    	    throw new FatalException("ConceptDriftAdaptor '" + getName() +				     "': There must be at least as many examples as " +				     "batches for the time simulation in a concept " +				     "drift experiment (here "+noOfExamples+" examples " +				     "are to be distributed to " + noOfBatches + " batches).");	}	// ---- retrieve the operator parameters (learner type, window size) ----	learnerType    = getParameterAsInt ("learner_type");	timeWindowSize = getParameterAsInt ("window_size");	/* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': create new attributes", LogService.TASK);        // #### add new attributes to example set:  time_index, batch_index, weight ####	// #### (and set 'time_index', and 'batch_index' to 'unused')               ####	/* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': get label attribute", LogService.TASK);	/* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': label = "+exampleSet.getLabel(), LogService.TASK);	labelAttribute = exampleSet.getLabel();	//// create special weight attribute (get /set values via Example.getWeight /.setWeight):	/* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': create weight attribute", LogService.TASK);	exampleSet.createWeightAttribute();	//// create new attributes for the time index and the batch index of an example:	/* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': create time index attribute", LogService.TASK);	timeIndexAttribute = new Attribute (Attribute.createName("time_index"),                  // create new attribute					    Ontology.INTEGER, Ontology.SINGLE_VALUE,					    Attribute.UNDEFINED_BLOCK_NR, null);	(exampleSet.getExampleTable()).addAttribute(timeIndexAttribute);                         // create example table column	// exampleSet.appendAttributeReference (new AttributeReference (timeIndexAttribute,false)); // register ref. in example set	/* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': create batch index attribute", LogService.TASK);	batchIndexAttribute = new Attribute (Attribute.createName("batch_index"),					     Ontology.INTEGER, Ontology.SINGLE_VALUE,					     Attribute.UNDEFINED_BLOCK_NR, null);	(exampleSet.getExampleTable()).addAttribute(batchIndexAttribute);	// exampleSet.appendAttributeReference (new AttributeReference (batchIndexAttribute,false));	LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': " +			      "start time simulation for concept drift experiment",			      LogService.TASK);	// #### start simulation ####
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -