📄 conceptdriftsimulator.java

📁 著名的开源仿真软件yale
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* *  YALE - Yet Another Learning Environment *  Copyright (C) 2002, 2003 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,  *          Katharina Morik, Oliver Ritthoff *      Artificial Intelligence Unit *      Computer Science Department *      University of Dortmund *      44221 Dortmund,  Germany *  email: yale@ls8.cs.uni-dortmund.de *  web:   http://yale.cs.uni-dortmund.de/ * *  This program is free software; you can redistribute it and/or *  modify it under the terms of the GNU General Public License as  *  published by the Free Software Foundation; either version 2 of the *  License, or (at your option) any later version.  * *  This program is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *  General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *  USA. */package edu.udo.cs.yale.operator.time;import edu.udo.cs.yale.operator.parameter.*;import edu.udo.cs.yale.operator.OperatorException;import edu.udo.cs.yale.operator.FatalException;import edu.udo.cs.yale.MethodNotSupportedException;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.RandomGenerator;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.example.Attribute;import edu.udo.cs.yale.example.Example;import edu.udo.cs.yale.example.ExampleSet;import edu.udo.cs.yale.example.BatchedExampleSet;import edu.udo.cs.yale.example.ExampleReader;import edu.udo.cs.yale.operator.Value;import edu.udo.cs.yale.operator.Operator;import edu.udo.cs.yale.operator.OperatorChain;import edu.udo.cs.yale.operator.ValidationChain;import edu.udo.cs.yale.operator.IllegalInputException;import edu.udo.cs.yale.operator.IOContainer;import edu.udo.cs.yale.operator.IOObject;import edu.udo.cs.yale.operator.IODescription;import edu.udo.cs.yale.operator.learner.Model;import edu.udo.cs.yale.operator.performance.PerformanceVector;import edu.udo.cs.yale.operator.performance.RunVector;import edu.udo.cs.yale.operator.performance.SeriesVector;import edu.udo.cs.yale.operator.performance.PerformanceCriterion;import edu.udo.cs.yale.operator.performance.EstimatedPerformance;import java.util.*;import java.io.*;/** A <code>ConceptDriftSimulator</code> encapsulates a concept drift experiment *  (like it is for example described in {@yale.cite Klinkenberg/Joachims/2000a}). *  This operator simulates the interest of a user in certain examples (for *  example text documents) over time. *     Whithin the simulation, the examples are assumed to arrive in batches, i.e. *  a fixed number of examples is assumed to arrive at each single point in time. *  The examples (e.g. text documents) may originate from several data streams *  (e.g. news topics) and the probability of an example to be relevant depends  *  on the stream the example comes from and on the current point in time (batch). *     The operator requires as input an example set and returns as its output a *  {@link PerformanceVector} of the performance results obtained by the enclosed *  classification learner/applier chains averaged over all batches and all runs *  and a {@link RunVector} of the result for each batch averaged over all runs. *     The latter allows to observe the performance of the enclosed learner/applier *  chains over time. *     The number of runs for averaging can be specified and is comparable to the *  number of folds (<code>number_of_validations</code>) in a cross-validation experiment *  ({@link edu.udo.cs.yale.operator.XValidation}).<br /> *   *  The first inner operator (or operator chain) must be a classification learner *  and the second a model applier and evaluator chain, *  usually consisting of a model applier and a performance evaluator, which returns *  a {@link PerformanceVector}.<br /> *  *  Please see figure {@yale.ref fig:concept_drift_example|Concept Drift Example} for an example experiment setup. * *  <!-- this is automatically generated * *  <h4>Parameter operators enclosed in ConceptDriftSimulator:</h4> *  The parametrization of this operator in the Yale configuration file for the *  experiment has to contain <i>two operators</i> of the following types: *  <ol> *    <li>First a classification learning chain that delivers a learned {@link Model};</li> *    <li>Second a classification applier chain able to use this model to predict  *        the labels of new examples and evaluate them. This chain delivers a *        {@link PerformanceVector} (describing the performance of the classification *        model in one run on one batch).</li> *  </ol> * *  <h4>Parameters:</h4> *  <ul> *    <li><b>number_of_runs</b>: specifies how often the concept drift simulation  *                               should be repeated for computing the average results *                               (similar to number of folds (<code>number_of_validations</code>) *                               in cross-validation ({@link edu.udo.cs.yale.operator.XValidation})).</li> *    <li><b>number_of_batches</b>: specifies the number of time steps to be simulated; *                                  the size of a batch, i.e. the number of examples in a *                                  batch, is the total number of examples divided by the *                                  number of batches.</li> *    <li><b>number_of_streams</b>: specifies the number of data streams the examples come  *                                  from.</li> *    <li><b>data_stream_names</b>: specifies the names of the data streams the examples come  *                                  from (i.e. the possible values of the class label attribute).</li> *    <li><b>data_stream_relevance</b>: specifies the probability for examples to be relevant *                                      to the simulated user interest depending on the data *                                      stream they come frome and the current batch; all *                                      probabilities not explicitly specified are considered *                                      to be <code>0.0</code> (= non-relevant). *                                      For the value of this parameter to be parsed correctly, *                                      each line of this value should contain the specification *                                      for exactly one stream. Each such line should start with *                                      the stream name followed by ":" and the probability values *                                      for the examples from that stream separated by whitespace. *        </li> *    <li><b>learner_type</b>: type of the enclosed learner: *        <ul> *          <li><i>static</i>:        static learner to be used on all old data *                                    (= full memory approach).</li> *          <li><i>static_window</i>: static learner to be used on a fixe time window on the *                                    old data (= no memory approach, in case of window size 1,  *                                    and other fixed window size approach otherwise).</li> *          <li><i>adaptive</i>:      adaptive learner that maintains an adaptive time window  *                                    or example weighting by itself.</li> *        </ul> *        </li> *    <li><b>window_size</b>: size of the fixed time window in number of batches  *                            (if the learner type <code>static_window</code> is used); *                            this parameter is only considered, if the learner type *                            is <code>static_window</code>, and ignored otherwise. *        </li> *  </ul> * *  <h4>Operator-Input</h4> *  <ol> *    <li>{@link ExampleSet}: set of examples to be used for the concept drift simulation *                            experiment; the class label attribute of an examples must *                            contain the name of the data stream the example originates from.</li> *  </ol> *  <h4>Operator-Output</h4> *  <ol> *    <li>{@link PerformanceVector} of averaged performance results ({@link PerformanceCriterion}), *        averaged over all batches and runs.</li> *    <li>{@link RunVector} containing one {@link PerformanceVector} for each batch with the *        average performance on this batch, averaged over all runs.</li> *  </ol> * *  <h4>Values:</h4> *  <ul> *    <li><code>performance</code> returns the current (main) performance criterion value</li> *    <li><code>variance</code> returns the current performance criterion variance (or standard deviation)</li> *    <li><code>run</code> returns the number of the current run</li> *  </ul> *  --> * *  {@yale.xmlinput ConceptDriftSimulatorExample.xml|concept_drift_example|Example configuration of ConceptDriftSimulator} * *  @see edu.udo.cs.yale.operator.XValidation *  @see edu.udo.cs.yale.operator.time.ConceptDriftAdaptor *  @see edu.udo.cs.yale.operator.learner.BatchWindowLearner *  @see edu.udo.cs.yale.operator.learner.BatchWeightLearner * *  @yale.xmlclass ConceptDriftSimulator *  @yale.reference Klinkenberg/Joachims/2000a * *  @author  Ralf Klinkenberg *  @version $Id: ConceptDriftSimulator.java,v 2.27 2003/09/04 14:57:57 klinkenb Exp $ */public class ConceptDriftSimulator extends ValidationChain {    // History:    // -> RK/2001 / RK/2002: re-implementation of the concept drift simulator of the text classification    //                       experiment environment DyCE (which was e.g. used in [Klinkenberg/1998a] and     //                       [Klinkenberg/Joachims/2000a]) in Yale;    // -> RK/2002/05/21: adaption to Yale extensions related to the parameter service etc.;    // -> RK/2003/03/21: merger of RK's Yale 1.0 version into Yale 2.0;    // -> RK/2003/06/18: JavaDoc /API comments and example configuration in XML updated (content);    // -> RK/2003/06/25: JavaDoc /API comments and example configuration in XML updated (page width);    private static final Class[]  INPUT_CLASSES  = { ExampleSet.class };   // exactly like a ValidationChain    private static final Class[]  OUTPUT_CLASSES = { PerformanceVector.class, RunVector.class };    /** label to be used for examples considered interesting to the user (relevant) in the concept drift simulation */    // private static final String   POS_LABEL = "+1";    // label as String  --    //// private static final double      POS_LABEL = +1.0;    // label as double  ++    /** label to be used for examples considered not interesting to the user (non-relevant) in the concept drift simulation */    // private static final String   NEG_LABEL = "-1";    // label as String  --    //// private static final double      NEG_LABEL = -1.0;    // label as double  ++    /** type of enclosed learner: static learner to be used on all old data (= full memory approach). */    private static final int  STATIC_LEARNER        = 0;    /** type of enclosed learner: static learner to be used on a fixe time window on the old data     *  (= no memory approach for window size 1, or other fixed window size approach otherwise).     */    private static final int  STATIC_WINDOW_LEARNER = 1;    /** type of enclosed learner: adaptive learner that maintains an adaptive time window or example weighting by itself. */    private static final int  ADAPTIVE_LEARNER      = 2;    /** names of the learner types as they may be specified in the Yale configuration file. */    private static final String[]  LEARNER_TYPE_NAMES = {"static","static_window","adaptive"};    /** default time window size of a fixed window (= 3 batches). */    private static final int  DEFAULT_TIME_WINDOW_SIZE = 3;    // private PerformanceCriterion  lastPerformance;   // in super class    // private IOContainer           learningResult;    // in super class        private ExampleSet    inputSet;                   // input example set passed to this operator    private ExampleSet    exampleSet;                 // copy of this example set with additional attributes for internal use                                                      //   (weight, time_index, batch_index, user_interest (= new class label)    // private Attribute  weightAttribute;            // weight attribute (weight of an example)  //// <- obsolete /////    private Attribute     timeIndexAttribute;         // time index attribute (describing simulated order of examples in time)    private Attribute     batchIndexAttribute;        // batch index attribute (number of the batch an example is assigned to)    private Attribute     streamNameAttribute;        // original class label: name of the stream an example comes from    private Attribute     userInterestAttribute;      // new class label: simulated user interest in an example    private int           noOfRuns,    currentRun;    // (= XVal.number, XVal.iteration)
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -