📄 conceptdriftsimulator.java.old

📁 著名的开源仿真软件yale
💻 OLD
📖 第 1 页 / 共 4 页
字号:
 *  @version $Id: ConceptDriftSimulator.java,v 2.6 2003/04/04 11:59:30 fischer Exp $ */public class ConceptDriftSimulator extends ValidationChain {    // History:    // -> RK/2001 / RK/2002: re-implementation of the concept drift simulator of the text classification    //                       experiment environment DyCE (e.g. used in [Klinkenberg/1998a] and     //                       [Klinkenberg/Joachims/2000a]) int Yale;    // -> RK/2002/05/21: adaption to Yale extensions related to the parameter service etc.;    // -> RK/2003/03/21: merger of RK's Yale 1.0 version into Yale 2.0;    private static final Class[]  INPUT_CLASSES  = { ExampleSet.class };   // exactly like a ValidationChain    private static final Class[]  OUTPUT_CLASSES = { PerformanceVector.class, RunVector.class };    /** label to be used for examples considered interesting to the user (relevant) in the concept drift simulation */    // private static final String   POS_LABEL = "+1";    // label as String  --    //// private static final double      POS_LABEL = +1.0;    // label as double  ++    /** label to be used for examples considered not interesting to the user (non-relevant) in the concept drift simulation */    // private static final String   NEG_LABEL = "-1";    // label as String  --    //// private static final double      NEG_LABEL = -1.0;    // label as double  ++    /** type of enclosed learner: static learner to be used on all old data (= full memory approach). */    private static final int  STATIC_LEARNER        = 0;    /** type of enclosed learner: static learner to be used on a fixe time window on the old data     *  (= no memory approach for window size 1, or other fixed window size approach otherwise).     */    private static final int  STATIC_WINDOW_LEARNER = 1;    /** type of enclosed learner: adaptive learner that maintains an adaptive time window or example weighting by itself. */    private static final int  ADAPTIVE_LEARNER      = 2;    /** names of the learner types as they may be specified in the Yale configuration file. */    private static final String[]  LEARNER_TYPE_NAMES = {"static","static_window","adaptive"};    /** default time window size of a fixed window (= 3 batches). */    private static final int  DEFAULT_TIME_WINDOW_SIZE = 3;    // private PerformanceCriterion  lastPerformance;   // in super class    // private IOContainer           learningResult;    // in super class        private ExampleSet    inputSet;                   // input example set passed to this operator    private ExampleSet    exampleSet;                 // copy of this example set with additional attributes for internal use                                                      //   (weight, time_index, batch_index, user_interest (= new class label)    // private Attribute  weightAttribute;            // weight attribute (weight of an example)  //// <- obsolete /////    private Attribute     timeIndexAttribute;         // time index attribute (describing simulated order of examples in time)    private Attribute     batchIndexAttribute;        // batch index attribute (number of the batch an example is assigned to)    private Attribute     streamNameAttribute;        // original class label: name of the stream an example comes from    private Attribute     userInterestAttribute;      // new class label: simulated user interest in an example    private int           noOfRuns,    currentRun;    // (= XVal.number, XVal.iteration)    private int           noOfBatches, currentBatch;    private int           noOfStreams;                // no. of input data streams (e.g. no. of topic of text documents)    private int           noOfExamples;               // total no. of examples (e.g. documents) in the input example set    private int           batchSize;                  // no. of documents per batch (evenly distributed, left overs discarded)    private String[]      streamNames;    private double[][]    streamRelevance;            // streamRelevance[s][b] = probability for a example from stream 's'                                                      //                         to be relevant in batch 'b'    private int[]         timeIndex2example;          // maps a time point to the example (e.g. document) at that time point    private int[]         example2timeIndex;          // maps an example (e.g. document) to a time point    private int[]         example2batchIndex;         // maps an example (e.g. document) to a batch    private int[]         example2label;              // maps an example (e.g. document) to its (user interest) label    private int[]         example2streamIndex;        // example2streamIndex[e] = index of the stream the example 'e' comes from    private int[][]       noOfStreamExamplesInBatch;  // noOfStreamExamplesInBatch[s][b] = no. of examples from stream 's'                                                      //                                   in batch 'b'    private int[]         noOfExamplesInBatch;        // noOfExamplesInBatch[b] = no. of examples in batch 'b'    private int           learnerType;          // type of enclosed learner (static vs. static window vs. adaptive)    private int           timeWindowSize;       // window size in case of a static window learner (window of fixed size)    private static final int  ILLEGAL_STREAM_INDEX = -1;    public ConceptDriftSimulator() {	// Make the number of the current run of this operator, which starts with 1 and 	// goes up to 'noOfRuns' (= parameter 'number_of_runs'), and which can for example	// be used to monitor the progress of this operator, externally accessible:	addValue(new Value("run", "The number of the current run.") {		public double getValue() {		    return currentRun;		}	    });    }    /** returns the the classes this operator expects as input. */    public Class[] getInputClasses() { return INPUT_CLASSES; }    /** returns the the classes this operator provides as output. */    public Class[] getOutputClasses() { return OUTPUT_CLASSES; }    // ==== apply() : Concept Drift Simulation ====    //    public IOObject[] apply() throws OperatorException {	// #### initialize ####	LogService.logMessage("ConceptDriftSimulator '" + getName() + "': prepare concept drift simulation", LogService.TASK);	// ---- retrieve the operator input objects ----	IOContainer  input = getInput();                                     // operator input objects	inputSet           = (ExampleSet)input.getInput(ExampleSet.class);   // input example set	exampleSet         = (ExampleSet)inputSet.clone();                   // internal copy of the input example set	// ---- retrieve the operator parameters (no. of runs, no. of batches, data streams) ----	noOfRuns    = getParameterAsInt("number_of_runs");     // min. 2 runs, default 10 runs	noOfBatches = getParameterAsInt("number_of_batches");  // min. 2 batches, default 10 batches	noOfStreams = getParameterAsInt("number_of_streams");  // min. 2 streams, default 2 streams	// data_stream_relevance[s][b] = probability of a document of stream 's' (= Topic t) to be relevant in batch 'b')	streamNames     = new String[noOfStreams];	streamRelevance = new double[noOfStreams][noOfBatches];	String  streamNamesString     = getParameterAsString("data_stream_names");	scanStreamNames (streamNames, streamNamesString);	String  streamRelevanceString = getParameterAsString("data_stream_relevance");	scanStreamRelevanceSpecification (streamRelevance, streamRelevanceString, streamNames, exampleSet);	// ---- compute further variables ----	noOfExamples = (exampleSet.getSize());	batchSize    = ((int) (((double) noOfExamples) / ((double) noOfBatches)));	// sanity check:  min. 2 batches, min. 1 example per batch (=> min. 2 examples):	if (noOfBatches < 2) {	    throw new FatalException("ConceptDriftSimulator '"+getName()+"': There must be at least 2 batches " +				     "for a concept drift simulation (here "+noOfBatches+" batches).");	}	if (batchSize < 1) {    	    throw new FatalException("ConceptDriftSimulator '"+getName()+"': There must be at least as many examples as " +				     "batches for a concept drift simulation (here "+noOfExamples+" examples are to be " +				     "distributed to " + noOfBatches + " batches).");	}	// ---- retrieve the operator parameters (learner type, window size) ----	learnerType    = getParameterAsInt ("learner_type");	timeWindowSize = getParameterAsInt ("window_size");        // #### add new attributes to example set:  [stream[_index]], time_index, batch_index, weight, user_interest ####	// #### (and set 'stream', 'time_index', and 'batch_index' to 'unused')                                      ####	// keep old class labels (= data stream names) in a new attribute before dropping the old class label:	streamNameAttribute = (Attribute) (exampleSet.getLabel()).clone();	streamNameAttribute.setName (Attribute.createName("stream_name"));	// exampleSet.appendAttributeReference (new AttributeReference (streamNameAttribute,false));	//// create special weight attribute (get /set values via Example.getWeight /.setWeight):	exampleSet.createWeightAttribute();	//// create new attributes for the time index and the batch index of an example:	timeIndexAttribute = new Attribute (Attribute.createName("time_index"),                  // create new attribute					    Ontology.INTEGER, Ontology.SINGLE_VALUE,					    Attribute.UNDEFINED_BLOCK_NR, null);	(exampleSet.getExampleTable()).addAttribute(timeIndexAttribute);                         // create example table column	// exampleSet.appendAttributeReference (new AttributeReference (timeIndexAttribute,false)); // register ref. in example set	batchIndexAttribute = new Attribute (Attribute.createName("batch_index"),					     Ontology.INTEGER, Ontology.SINGLE_VALUE,					     Attribute.UNDEFINED_BLOCK_NR, null);	(exampleSet.getExampleTable()).addAttribute(batchIndexAttribute);	// exampleSet.appendAttributeReference (new AttributeReference (batchIndexAttribute,false));	//// create new attribute for the new class labels (= simulated user interest):	userInterestAttribute = new Attribute (Attribute.createName("user_interest"),					       Ontology.CLASSIFICATION, Ontology.SINGLE_VALUE,					       Attribute.UNDEFINED_BLOCK_NR, null);	(exampleSet.getExampleTable()).addAttribute(userInterestAttribute);	//// exampleSet.appendAttributeReference (new AttributeReference (userInterestAttribute,true));	////// change class label attribute from stream name (original) to user interest (new):	exampleSet.setLabel (userInterestAttribute);	LogService.logMessage("ConceptDriftSimulator '" + getName() + "': start concept drift simulation", LogService.TASK);	// #### loop for runs ####	BatchedExampleSet  currentExampleSet;                             // current ExampleSet	// performance result data structures:	SeriesVector       performanceResultMatrix = new SeriesVector();  // results of all batches for all runs	RunVector          singleRunResults;                              // result time series of a single run	RunVector          averagedTimeSeriesResults;                     // average result time series, averaged over all runs	PerformanceVector  averagedOverallResults;                        // overall result averaged over all runs and batches	for (int run = 0; run < noOfRuns; run++) {	    singleRunResults = new RunVector();	    // #### sample example2batch and example2class ####	    sampleExamples2Batches();	    sampleExamples2Labels();	    // #### loop for batches ####	    // Training set:  batches 0..b-1  (for induction)  (vs.: for transduction also batch b, but without labels)	    // Test set:      batch   b	    for (int batch = 1; batch < noOfBatches; batch++) {		LogService.logMessage("ConceptDriftSimulator '" + getName() + "': current run = " + (run+1) + 				      ", current batch = " + batch + "\n", LogService.TASK);		// ---- prepare training set ----		// induction:		int  firstBatch = 0;		switch (learnerType) {		case STATIC_WINDOW_LEARNER :		    firstBatch = maximumInt (0, batch-timeWindowSize);		    break;		case STATIC_LEARNER :		case ADAPTIVE_LEARNER :		default :		    firstBatch = 0;		}		currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, firstBatch, batch-1);                //		// // transduction:		// currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, firstBatch, batch);		// // ... for transduction: clone label and set to unlabeled for last batch		// ---- call learning chain ----		// ((SVMLearner)((OperatorChain)this.getOperator(0)).getOperator(0)).setPositiveLabelIndex((int)userInterestAttribute.mapString(Attribute.POSITIVE_CLASS));  // TMP: HACK !!!		learn((ExampleSet)currentExampleSet);		// if (learnerType == ADAPTIVE_LEARNER) :  BatchLearner < Learner,		//	 learn(currentExampleSet,batchIndexAttribute,firstBatch,currentBatch,lastBatch)		//    or learn(currentExampleSet) with getBatchIndexAttribute(), getFirstBatch(), getLastBatch();		// ---- prepare test set ----		currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, batch, batch);		// ---- call applier and performance evaluator chain and get its output ----		IOContainer evalOutput = evaluate(currentExampleSet);		// read results		PerformanceVector  currentResults = (PerformanceVector)evalOutput.getInput(PerformanceVector.class);  		setLastPerformance(currentResults.get(0));		singleRunResults.add(currentResults);		inApplyLoop();  // for GNU-Plot-Service	    }	    performanceResultMatrix.add(singleRunResults);	} // end of 'run' loop	// #### average over runs and batches ####	averagedTimeSeriesResults = performanceResultMatrix.getTimePointWiseAverageRunVector();	averagedOverallResults    = performanceResultMatrix.getOverallAveragePerformanceVector();  // equivalent to line below	// averagedOverallResults = RunVector.average(averagedTimeSeriesResults);                  // equivalent to line above	// #### construct operator output ####	IOObject[]  outputArray = new IOObject[2];	outputArray[0] = averagedOverallResults;	outputArray[1] = averagedTimeSeriesResults;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -