📄 conceptdriftsimulator.java
字号:
private int noOfBatches, currentBatch; private int noOfStreams; // no. of input data streams (e.g. no. of topic of text documents) private int noOfExamples; // total no. of examples (e.g. documents) in the input example set private int batchSize; // no. of documents per batch (evenly distributed, left overs discarded) private String[] streamNames; private double[][] streamRelevance; // streamRelevance[s][b] = probability for an example from stream 's' // to be relevant in batch 'b' private int[] timeIndex2example; // maps a time point to the example (e.g. document) at that time point private int[] example2timeIndex; // maps an example (e.g. document) to a time point private int[] example2batchIndex; // maps an example (e.g. document) to a batch private int[] example2label; // maps an example (e.g. document) to its (user interest) label private int[] example2streamIndex; // example2streamIndex[e] = index of the stream the example 'e' comes from private int[][] noOfStreamExamplesInBatch; // noOfStreamExamplesInBatch[s][b] = no. of examples from stream 's' // in batch 'b' private int[] noOfExamplesInBatch; // noOfExamplesInBatch[b] = no. of examples in batch 'b' private int learnerType; // type of enclosed learner (static vs. static window vs. adaptive) private int timeWindowSize; // window size in case of a static window learner (window of fixed size) private static final int ILLEGAL_STREAM_INDEX = -1; public ConceptDriftSimulator() { // Make the number of the current run of this operator, which starts with 1 and // goes up to 'noOfRuns' (= parameter 'number_of_runs'), and which can for example // be used to monitor the progress of this operator, externally accessible: addValue(new Value("run", "The number of the current run.") { public double getValue() { return currentRun; } }); } /** returns the the classes this operator expects as input. */ public Class[] getInputClasses() { return INPUT_CLASSES; } /** returns the the classes this operator provides as output. */ public Class[] getOutputClasses() { return OUTPUT_CLASSES; } // ==== apply() : Concept Drift Simulation ==== // public IOObject[] apply() throws OperatorException { // #### initialize #### LogService.logMessage("ConceptDriftSimulator '" + getName() + "': prepare concept drift simulation", LogService.TASK); // ---- retrieve the operator input objects ---- IOContainer input = getInput(); // operator input objects inputSet = (ExampleSet)input.getInput(ExampleSet.class); // input example set exampleSet = (ExampleSet)inputSet.clone(); // internal copy of the input example set /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': old label attribute = "+inputSet.getLabel(), LogService.TASK); // ---- retrieve the operator parameters (no. of runs, no. of batches, data streams) ---- noOfRuns = getParameterAsInt("number_of_runs"); // min. 2 runs, default 10 runs noOfBatches = getParameterAsInt("number_of_batches"); // min. 2 batches, default 10 batches noOfStreams = getParameterAsInt("number_of_streams"); // min. 2 streams, default 2 streams // data_stream_relevance[s][b] = probability of a document of stream 's' (= Topic t) to be relevant in batch 'b') streamNames = new String[noOfStreams]; streamRelevance = new double[noOfStreams][noOfBatches]; String streamNamesString = getParameterAsString("data_stream_names"); scanStreamNames (streamNames, streamNamesString); String streamRelevanceString = getParameterAsString("data_stream_relevance"); scanStreamRelevanceSpecification (streamRelevance, streamRelevanceString, streamNames, exampleSet); // ---- compute further variables ---- /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': check example set", LogService.TASK); noOfExamples = (exampleSet.getSize()); batchSize = ((int) (((double) noOfExamples) / ((double) noOfBatches))); // sanity check: min. 2 batches, min. 1 example per batch (=> min. 2 examples): if (noOfBatches < 2) { throw new FatalException("ConceptDriftSimulator '"+getName()+"': There must be at least 2 batches " + "for a concept drift simulation (here "+noOfBatches+" batches)."); } if (batchSize < 1) { throw new FatalException("ConceptDriftSimulator '"+getName()+"': There must be at least as many examples as " + "batches for a concept drift simulation (here "+noOfExamples+" examples are to be " + "distributed to " + noOfBatches + " batches)."); } // ---- retrieve the operator parameters (learner type, window size) ---- learnerType = getParameterAsInt ("learner_type"); timeWindowSize = getParameterAsInt ("window_size"); /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': create new attributes", LogService.TASK); // #### add new attributes to example set: [stream[_index]], time_index, batch_index, weight, user_interest #### // #### (and set 'stream', 'time_index', and 'batch_index' to 'unused') #### // keep old class labels (= data stream names) in a new attribute before dropping the old class label: /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': get old label attribute", LogService.TASK); /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': old label = "+exampleSet.getLabel(), LogService.TASK); /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': clone old label attribute", LogService.TASK); streamNameAttribute = (Attribute) (exampleSet.getLabel()).clone(); /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': create stream name attribute (= clone of old label)", LogService.TASK); streamNameAttribute.setName (Attribute.createName("stream_name")); // exampleSet.appendAttributeReference (new AttributeReference (streamNameAttribute,false)); //// create special weight attribute (get /set values via Example.getWeight /.setWeight): /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': create weight attribute", LogService.TASK); exampleSet.createWeightAttribute(); //// create new attributes for the time index and the batch index of an example: /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': create time index attribute", LogService.TASK); timeIndexAttribute = new Attribute (Attribute.createName("time_index"), // create new attribute Ontology.INTEGER, Ontology.SINGLE_VALUE, Attribute.UNDEFINED_BLOCK_NR, null); (exampleSet.getExampleTable()).addAttribute(timeIndexAttribute); // create example table column // exampleSet.appendAttributeReference (new AttributeReference (timeIndexAttribute,false)); // register ref. in example set /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': create batch index attribute", LogService.TASK); batchIndexAttribute = new Attribute (Attribute.createName("batch_index"), Ontology.INTEGER, Ontology.SINGLE_VALUE, Attribute.UNDEFINED_BLOCK_NR, null); (exampleSet.getExampleTable()).addAttribute(batchIndexAttribute); // exampleSet.appendAttributeReference (new AttributeReference (batchIndexAttribute,false)); //// create new attribute for the new class labels (= simulated user interest): /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': create new label attribute", LogService.TASK); userInterestAttribute = new Attribute (Attribute.createName("user_interest"), Ontology.CLASSIFICATION, Ontology.SINGLE_VALUE, Attribute.UNDEFINED_BLOCK_NR, null); (exampleSet.getExampleTable()).addAttribute(userInterestAttribute); //// exampleSet.appendAttributeReference (new AttributeReference (userInterestAttribute,true)); ////// change class label attribute from stream name (original) to user interest (new): /* TMP/2003/04/30 */ LogService.logMessage("ConceptDriftSimulator '" + getName() + "': set new label attribute", LogService.TASK); exampleSet.setLabel (userInterestAttribute); LogService.logMessage("ConceptDriftSimulator '" + getName() + "': start concept drift simulation", LogService.TASK); // #### loop for runs #### BatchedExampleSet currentExampleSet; // current ExampleSet // performance result data structures: SeriesVector performanceResultMatrix = new SeriesVector(); // results of all batches for all runs RunVector singleRunResults; // result time series of a single run RunVector averagedTimeSeriesResults; // average result time series, averaged over all runs PerformanceVector averagedOverallResults; // overall result averaged over all runs and batches for (int run = 0; run < noOfRuns; run++) { singleRunResults = new RunVector(); // #### sample example2batch and example2class #### sampleExamples2Batches(); sampleExamples2Labels(); // #### loop for batches #### // Training set: batches 0..b-1 (for induction) (vs.: for transduction also batch b, but without labels) // Test set: batch b for (int batch = 1; batch < noOfBatches; batch++) { LogService.logMessage("ConceptDriftSimulator '" + getName() + "': current run = " + (run+1) + ", current batch = " + batch + "\n", LogService.TASK); // ---- prepare training set ---- // induction: int firstBatch = 0; switch (learnerType) { case STATIC_WINDOW_LEARNER : firstBatch = maximumInt (0, batch-timeWindowSize); break; case STATIC_LEARNER : case ADAPTIVE_LEARNER : default : firstBatch = 0; } currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, firstBatch, batch-1); // // // transduction: // currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, firstBatch, batch); // // ... for transduction: clone label and set to unlabeled for last batch // ---- call learning chain ---- // ((SVMLearner)((OperatorChain)this.getOperator(0)).getOperator(0)).setPositiveLabelIndex((int)userInterestAttribute.mapString(Attribute.POSITIVE_CLASS)); // TMP: HACK !!! learn((ExampleSet)currentExampleSet); // if (learnerType == ADAPTIVE_LEARNER) : BatchLearner < Learner, // learn(currentExampleSet,batchIndexAttribute,firstBatch,currentBatch,lastBatch) // or learn(currentExampleSet) with getBatchIndexAttribute(), getFirstBatch(), getLastBatch(); // ---- prepare test set ---- currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, batch, batch); // ---- call applier and performance evaluator chain and get its output ---- IOContainer evalOutput = evaluate(currentExampleSet); //// read results PerformanceVector currentResults = (PerformanceVector)evalOutput.getInput(PerformanceVector.class); setLastPerformance(currentResults.getMainCriterion()); singleRunResults.add(currentResults); inApplyLoop(); // for GNU-Plot-Service } performanceResultMatrix.add(singleRunResults); } // end of 'run' loop // #### average over runs and batches #### averagedTimeSeriesResults = performanceResultMatrix.getTimePointWiseAverageRunVector(); averagedOverallResults = performanceResultMatrix.getOverallAveragePerformanceVector(); // equivalent to line below // averagedOverallResults = RunVector.average(averagedTimeSeriesResults); // equivalent to line above // #### construct operator output #### IOObject[] outputArray = new IOObject[2]; outputArray[0] = averagedOverallResults; outputArray[1] = averagedTimeSeriesResults; // setResult(averagedOverallResults.get(0)); // RK/2003/04/30: old setResult(averagedOverallResults.getMainCriterion()); // RK/2003/04/30: new LogService.logMessage("ConceptDriftSimulator '" + getName() + "': complete concept drift simulation", LogService.TASK); return outputArray; } /** returns the maximum of two integer variables. */ private int maximumInt (int v1, int v2) { if (v1 > v2) return v1; return v2; } /** scan data stream names from the given string. */ private String[] scanStreamNames (String[] streamNames, String streamNamesString) { StringTokenizer tokenizer = new StringTokenizer (streamNamesString); int streamIndex = 0; String outString = "\nStream names read from configuration file:\n";
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -