📄 conceptdriftadaptor.java
字号:
BatchedExampleSet currentExampleSet; // current ExampleSet // performance result data structures: RunVector timeSeriesResults = new RunVector(); // result time series PerformanceVector averagedOverallResults; // overall result averaged over all batches // #### assign example2batch and example2class #### assignExamples2Batches(); assignExamples2Labels(); // #### loop for batches #### // Training set: batches 0..b-1 (for induction) (vs.: for transduction also batch b, but without labels) // Test set: batch b for (int batch = 1; batch < noOfBatches; batch++) { LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': " + "current batch = " + batch + "\n", LogService.TASK); // ---- prepare training set ---- // induction: int firstBatch = 0; switch (learnerType) { case STATIC_WINDOW_LEARNER : firstBatch = maximumInt (0, batch-timeWindowSize); break; case STATIC_LEARNER : case ADAPTIVE_LEARNER : default : firstBatch = 0; } currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, firstBatch, batch-1); // // // transduction: // currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, firstBatch, batch); // // ... for transduction: clone label and set to unlabeled for last batch // ---- call learning chain ---- LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': apply(): " + "call learner with training example set (= batches " + firstBatch + ".." + (batch-1) + ")", LogService.MINIMUM); learn((ExampleSet)currentExampleSet); // if (learnerType == ADAPTIVE_LEARNER) : BatchLearner < Learner, // learn(currentExampleSet,batchIndexAttribute,firstBatch,currentBatch,lastBatch) // or learn(currentExampleSet) with getBatchIndexAttribute(), getFirstBatch(), getLastBatch(); // ---- prepare test set ---- currentExampleSet = new BatchedExampleSet (exampleSet, batchIndexAttribute, batch, batch); // ---- call applier and performance evaluator chain and get its output ---- LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': apply(): " + "classify test example set (= batch " + batch + ") " + "and evaluate", LogService.MINIMUM); IOContainer evalOutput = evaluate(currentExampleSet); //// read results PerformanceVector currentResults = (PerformanceVector)evalOutput.getInput(PerformanceVector.class); setLastPerformance(currentResults.getMainCriterion()); timeSeriesResults.add(currentResults); inApplyLoop(); // for GNU-Plot-Service } // #### average over batches #### averagedOverallResults = timeSeriesResults.average(); // #### construct operator output #### IOObject[] outputArray = new IOObject[2]; outputArray[0] = averagedOverallResults; outputArray[1] = timeSeriesResults; setResult(averagedOverallResults.getMainCriterion()); LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': " + "time simulation for concept drift experiment completed", LogService.TASK); return outputArray; } /** returns the maximum of two integer variables. */ private int maximumInt (int v1, int v2) { if (v1 > v2) return v1; return v2; } /** assign examples to batches */ private void assignExamples2Batches () { ExampleReader exampleIterator; Example currentExample; int e; // index of the current example int t; // time index of the current example int b; // index of the batch that the current example from stream 's' should be assigned to // maps an example (e.g. document) to a time point and vice versa: example2timeIndex = new int[noOfExamples]; timeIndex2example = new int[noOfExamples]; // maps an example (e.g. document) to a batch: example2batchIndex = new int[noOfExamples]; // noOfExamplesInBatch[b] = no. of examples in batch 'b': noOfExamplesInBatch = new int[noOfBatches]; // ---- initialize all mappings ---- for (e = 0; e < noOfExamples; e++) { example2timeIndex[e] = 0; example2batchIndex[e] = 0; } for (b = 0; b < noOfBatches; b++) noOfExamplesInBatch[b] = 0; // ---- create 'example2timeIndex' (mapping of example index to time index) ---- LogService.logMessage ("ConceptDriftAdaptor '" + getName() + "': sample mapping example2timeIndex.", LogService.TASK); for (int i=0; i < noOfExamples; i++) timeIndex2example[i] = i; // scrambleArray (timeIndex2example); // no random order, i.e. preserve original order of the examples //// ---- create 'example2batchIndex' (mapping of example index to batch index) ---- //// -- first loop: retrieve 'example2stream' mapping and initialize weights -- // LogService.logMessage ("ConceptDriftAdaptor '" + getName() + "': sample mapping example2batchIndex (step 1).", // LogService.TASK); exampleIterator = exampleSet.getExampleReader(); // example iterator e = 0; // example index while ((e < noOfExamples) && (exampleIterator.hasNext())) { // for each example: currentExample = exampleIterator.next(); // * get the current example try { currentExample.setWeight (1.0); // * set its weight to 1.0 } catch (MethodNotSupportedException exception) { throw new RuntimeException("ConceptDriftAdaptor '"+getName()+"': " + "weight attribute must exist to set weights " + "(internal error, should never occur)."); } e++; // * increment example index } //// -- end of first loop -- // //// -- second loop: create 'example2timeIndex' and 'example2batchIndex' mapping -- // LogService.logMessage ("ConceptDriftAdaptor '" + getName() + "': sample mapping example2batchIndex (step 2).", // LogService.TASK); double batchSizeAsDouble = ((double)noOfExamples) / ((double)noOfBatches); b = 0; // start with batch 0 and t = 0; // and time point 0 // while (t < (noOfBatches * batchSize)) { // for each time point: // (A) while (t < noOfExamples) { // for each time point: // (B) // b = t / batchSize; // (A) (split evenly and disgard rest) b = (int)(((double)t)/batchSizeAsDouble); // (B) (split almost evenly, disgard none) e = timeIndex2example[t]; // * get its example's index example2timeIndex[e] = t; // * set example2timeIndex mapping example2batchIndex[e] = b; // * set its example's batch index noOfExamplesInBatch[b]++; // * counter for log file output t++; // * increment time point } // while (t < noOfExamples) { // for each remaining time point: (A) // e = timeIndex2example[t]; // * get its example's index (A) // example2timeIndex[e] = t; // * set example2timeIndex mapping (A) // example2batchIndex[e] = noOfBatches; // * set its example's batch index (A) // t++; // to a value outside simulation (A) // } //// -- end of second loop -- // //// -- third loop: set time index and batch index -- // LogService.logMessage ("ConceptDriftAdaptor '" + getName() + "': sample mapping example2batchIndex (step 3).", // LogService.TASK); exampleIterator = exampleSet.getExampleReader(); // example iterator e = 0; // example index while ((e < noOfExamples) && (exampleIterator.hasNext())) { // for each example: currentExample = exampleIterator.next(); // * get the current example currentExample.setValue (timeIndexAttribute, (double)(example2timeIndex[e])); // * set its time index currentExample.setValue (batchIndexAttribute, (double)(example2batchIndex[e])); // * set its batch index if (example2batchIndex[e] == noOfBatches) { // * if example is outside simu- try { // lation, set its weight to 0.0 currentExample.setWeight (0.0); // } catch (MethodNotSupportedException exception) { throw new RuntimeException("ConceptDriftAdaptor '"+getName()+"': " + "weight attribute must exist to set weights " + "(internal error, should never occur)."); } // } // e++; // * increment example index } //// -- end of third loop -- // ---- log file output: stream 2 batch distribution ---- String output = "\n Number of documents in each batch:\n"; output += " Batch: "; for (b = 0; b < noOfBatches; b++) { output += " " + b; } output += "\n"; output += " No. of Examples: "; for (b = 0; b < noOfBatches; b++) { output += " " + noOfExamplesInBatch[b]; } output += "\n"; LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': distribution of examples over batches: " + output, LogService.TASK); } // end of method 'sampleExamples2Batches' /** perform sampling: randomly assign labels to the examples (according to the stream relevance probabilities). */ private void assignExamples2Labels () throws OperatorException { // ??? method obsolete ??? (no labels are changed, only an (obsolete?) mapping is table created) int noOfClasses = labelAttribute.getNumberOfClasses(); int[][] noOfExamplesOfClassInBatch = new int[noOfClasses][noOfBatches]; ExampleReader exampleIterator; // example iterator Example currentExample; // reference to the current example int e, b, c; // index of the current example, its batch index, and its class index String label; // simulated user interest for the current example // label as String if (example2batchIndex == null) { throw new RuntimeException("ConceptDriftAdaptor '" + getName()+"': Prior to calling method 'assignExamples2Labels', " + "the method 'assignExamples2Batch' has to be called (class internal error)."); } for (b = 0; b < noOfBatches; b++) for (c=0; c < noOfClasses; c++) noOfExamplesOfClassInBatch[c][b] = 0; example2label = new int[noOfExamples]; // maps an example (e.g. document) to its (user interest) label LogService.logMessage ("ConceptDriftAdaptor '" + getName() + "': create mapping example2labels.\n", LogService.TASK); exampleIterator = exampleSet.getExampleReader(); e = 0; while ((e < noOfExamples) && (exampleIterator.hasNext())) { currentExample = exampleIterator.next(); b = example2batchIndex[e]; label = currentExample.getValueAsString (labelAttribute); // label = currentExample.getLabel (labelAttribute); c = Integer.parseInt(label) - 1; // class label 1..4 mapped to class index 0..3 example2label[e] = c; // example2label[e] = Integer.parseInt(label); // label as String -- // example2label[e] = (Integer.getInteger(label)).intValue(); // label as String -- // example2label[e] = (int)label; // label as double ++ noOfExamplesOfClassInBatch[c][b]++; e++; } // Sanity checks: Attribute labelAttribute = exampleSet.getLabel(); boolean labelIsNominal = Ontology.ATTRIBUTE_VALUE_TYPE.isA(labelAttribute.getValueType(), Ontology.NOMINAL); LogService.logMessage ("Class label attribute: '" + labelAttribute.getName() + "', number of classes = " + labelAttribute.getNumberOfClasses(), LogService.TASK); LogService.logMessage ("Class label is nominal: " + (labelIsNominal ? "Yes" : "No") + ".", LogService.TASK); LogService.logMessage ("ConceptDriftAdaptor '" + getName() + "': example2label sampling completed.\n", LogService.TASK); // Log file output: String output = "\n Number of examples of each class in each batch:\n"; output += " Batch: "; for (b = 0; b < noOfBatches; b++) { output += " " + b; } for(c = 0; c < noOfClasses; c++) { output += "\n Class " + (c+1) + ": "; for (b = 0; b < noOfBatches; b++) output += " " + noOfExamplesOfClassInBatch[c][b]; } output += "\n"; LogService.logMessage("ConceptDriftAdaptor '" + getName() + "': distribution of classes over batches: " + output, LogService.TASK); } // end of method 'sampleExamples2Labels' public List getParameterTypes() { List types = super.getParameterTypes(); types.add(new ParameterTypeInt("number_of_batches", "Specifies the number of time steps to be simulated.", 2, Integer.MAX_VALUE, 10)); types.add(new ParameterTypeInt("window_size", "Size of the fixed time window in number of batches.", 1, Integer.MAX_VALUE, DEFAULT_TIME_WINDOW_SIZE)); types.add(new ParameterTypeCategory("learner_type", "Type of the enclosed learner.", LEARNER_TYPE_NAMES, STATIC_LEARNER)); return types; } public int getNumberOfValidationSteps() { return 1; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -