📄 conceptdriftsimulator.java
字号:
while (tokenizer.hasMoreTokens() && (streamIndex < noOfStreams)) { streamNames[streamIndex] = tokenizer.nextToken(); outString += " " + streamNames[streamIndex]; streamIndex++; } LogService.logMessage ("ConceptDriftSimulator '"+getName()+"': " + outString, LogService.MINIMUM); return streamNames; } /** return the index of a stream name in the stream names array. */ private int getStreamIndex (String[] streamNames, String streamName) { for (int i=0; i < streamNames.length; i++) { if (streamName.equals(streamNames[i])) { return i; } } LogService.logMessage("Unknown stream name: "+streamName+". Stream names are: "+Arrays.asList(streamNames), LogService.ERROR); return ILLEGAL_STREAM_INDEX; // given stream name not found in the stream name array } /** return the index of a stream name */ private int getStreamIndex (String streamName) { return getStreamIndex (this.streamNames, streamName); } /** scan the relevance specification for the data streams from the given string. */ private double[][] scanStreamRelevanceSpecification (double[][] streamRelevance, String streamRelevanceString, String[] streamNames, ExampleSet inputSet) throws OperatorException { if (streamRelevanceString == null) { throw new FatalException(getName()+": Parameter 'data_stream_relevance' not set"); } int streamIndex = 0; int batchIndex = 0; try { BufferedReader reader = new BufferedReader(new StringReader(streamRelevanceString)); String line = null; StringTokenizer tokenizer = null; String streamName = null; String token = null; Attribute label = inputSet.getLabel(); // loop to scan drift specification for each given topic: while ((line = reader.readLine()) != null) { line = line.trim(); tokenizer = new StringTokenizer (line); if (tokenizer.hasMoreTokens()) { streamName = tokenizer.nextToken(); streamIndex = getStreamIndex(streamNames, streamName); if (streamIndex == ILLEGAL_STREAM_INDEX) { throw new FatalException("ConceptDriftSimulator '" + getName() + "': " + "illegal stream name '"+streamName+"' read in 'data_stream_relevance', " + "which did not occur in 'data_stream_names'"); } if (tokenizer.hasMoreTokens()) { // skip ':' after stream name: tokenizer.nextToken(); batchIndex = 0; // loop to retrieve and set relevance values for that topic, batch by batch: while (tokenizer.hasMoreTokens() && (batchIndex < noOfBatches)) { token = tokenizer.nextToken(); streamRelevance[streamIndex][batchIndex] = Double.parseDouble(token); batchIndex++; } } } } } catch (Exception e) { throw new FatalException("ConceptDriftSimulator '" + getName() + "': "+ "Exception while reading the 'data_stream_relevance' values:", e); } // ---- print relevance matrix to log file ---- String relevanceMatrixString = "\nRelevance probability of each stream over time:\n"; for (streamIndex=0; streamIndex < noOfStreams; streamIndex++) { relevanceMatrixString += " " + streamNames[streamIndex] + ":"; for (batchIndex=0; batchIndex < noOfBatches; batchIndex++) { relevanceMatrixString += " " + streamRelevance[streamIndex][batchIndex]; } relevanceMatrixString += "\n"; } LogService.logMessage ("ConceptDriftSimulator '"+getName()+"': "+relevanceMatrixString, LogService.TASK); return streamRelevance; } /** perform sampling: randomly assign examples to batches (distribute example stream as evenly as * possible to the batches) */ private void sampleExamples2Batches () { ExampleReader exampleIterator; Example currentExample; int e; // index of the current example int s; // index of the stream that the current current examples comes from int t; // time index of the current example int b; // index of the batch that the current example from stream 's' should be assigned to String currentStreamName; // name of the stream (e.g. topic) the current example (e.g. document) belongs to // maps an example (e.g. document) to a time point and vice versa: example2timeIndex = new int[noOfExamples]; timeIndex2example = new int[noOfExamples]; // maps an example (e.g. document) to a batch: example2batchIndex = new int[noOfExamples]; // noOfStreamExamplesInBatch[s][b] = no. of examples from stream 's' in batch 'b': noOfStreamExamplesInBatch = new int[noOfStreams][noOfBatches]; // noOfExamplesInBatch[b] = no. of examples in batch 'b': noOfExamplesInBatch = new int[noOfBatches]; // currentBatchForStream[s] = batch that the current example from stream 's' should be assigned to: int[] currentBatchForStream = new int[noOfStreams]; // example2streamIndex[e] = index of the stream the example 'e' comes from example2streamIndex = new int[noOfExamples]; // ---- initialize all mappings ---- for (e = 0; e < noOfExamples; e++) { example2streamIndex[e] = 0; example2timeIndex[e] = 0; example2batchIndex[e] = 0; } for (s = 0; s < noOfStreams; s++) { for (b = 0; b < noOfBatches; b++) noOfStreamExamplesInBatch[s][b] = 0; currentBatchForStream[s] = 0; } for (b = 0; b < noOfBatches; b++) noOfExamplesInBatch[b] = 0; // ---- create 'example2timeIndex' (mapping of example index to time index) ---- LogService.logMessage ("ConceptDriftSimulator '" + getName() + "': sample mapping example2timeIndex.", LogService.TASK); for (int i=0; i < noOfExamples; i++) timeIndex2example[i] = i; scrambleArray (timeIndex2example); // for (int i=0; i < noOfExamples; i++) example2timeIndex[i] = i; // scrambleArray (example2timeIndex); // ---- create 'example2batchIndex' (mapping of example index to batch index) ---- // -- first loop: retrieve 'example2stream' mapping and initialize weights and user interest -- // LogService.logMessage ("ConceptDriftSimulator '" + getName() + "': sample mapping example2batchIndex (step 1).", // LogService.TASK); exampleIterator = exampleSet.getExampleReader(); // example iterator e = 0; // example index while ((e < noOfExamples) && (exampleIterator.hasNext())) { // for each example: currentExample = exampleIterator.next(); // * get the current example // LogService.logMessage ("ConceptDriftSimulator '" + getName() + "': ... \n" + // " streamNameAttribute = " + streamNameAttribute.getName() + // " example index = " + e + ".\n", LogService.TASK); currentStreamName = currentExample.getValueAsString (streamNameAttribute); // * get its stream name s = getStreamIndex(currentStreamName); // * get its stream index example2streamIndex[e] = s; // * store its stream index try { currentExample.setWeight (1.0); // * set its weight to 1.0 } catch (MethodNotSupportedException exception) { throw new RuntimeException("ConceptDriftSimulator '"+getName()+"': " + "weight attribute must exist to set weights " + "(internal error, should never occur)."); } currentExample.setLabel (Attribute.NEGATIVE_CLASS); // * initialize its user interest // try { // currentExample.setValue (userInterestAttribute, Attribute.NEGATIVE_CLASS); // * initialize its user interest // } catch (MethodNotSupportedException exception) { // throw new RuntimeException("ConceptDriftSimulator '"+getName()+"': " + // "user interest attribute (= new class label)" + // " must be nominal (internal error, should " + // "never occur)."); // } e++; // * increment example index } // -- end of first loop -- // // -- second loop: create 'example2timeIndex' and 'example2batchIndex' mapping -- // LogService.logMessage ("ConceptDriftSimulator '" + getName() + "': sample mapping example2batchIndex (step 2).", // LogService.TASK); t = 0; while (t < (noOfBatches * batchSize)) { // for each time point: e = timeIndex2example[t]; // * get its example's index example2timeIndex[e] = t; // * set example2timeIndex mapping s = example2streamIndex[e]; // * get its example's stream index b = currentBatchForStream[s]; // * get its example's batch index if (noOfExamplesInBatch[b] < batchSize) { // example2batchIndex[e] = b; // * set its example's batch index t++; // * increment time point noOfStreamExamplesInBatch[s][b]++; // * counter for log file output } // currentBatchForStream[s] = (currentBatchForStream[s] + 1) % noOfBatches; // * modulo increment batch index } while (t < noOfExamples) { // for each remaining time point: e = timeIndex2example[t]; // * get its example's index example2timeIndex[e] = t; // * set example2timeIndex mapping example2batchIndex[e] = noOfBatches; // * set its example's batch index t++; // to a value outside simulation } // -- end of second loop -- // // -- third loop: set time index and batch index -- // LogService.logMessage ("ConceptDriftSimulator '" + getName() + "': sample mapping example2batchIndex (step 3).", // LogService.TASK); exampleIterator = exampleSet.getExampleReader(); // example iterator e = 0; // example index while ((e < noOfExamples) && (exampleIterator.hasNext())) { // for each example: currentExample = exampleIterator.next(); // * get the current example currentExample.setValue (timeIndexAttribute, (double)(example2timeIndex[e])); // * set its time index currentExample.setValue (batchIndexAttribute, (double)(example2batchIndex[e])); // * set its batch index if (example2batchIndex[e] == noOfBatches) { // * if example is outside simu- try { // lation, set its weight to 0.0 currentExample.setWeight (0.0); // } catch (MethodNotSupportedException exception) { throw new RuntimeException("ConceptDriftSimulator '"+getName()+"': " + "weight attribute must exist to set weights " + "(internal error, should never occur)."); } // } // e++; // * increment example index } // -- end of third loop -- // ---- log file output: stream 2 batch distribution ---- String output = "\n Number of documents of each stream in each batch:\n"; output += " Batch: "; for (b = 0; b < noOfBatches; b++) { output += " " + b; } output += "\n"; for (s = 0; s < noOfStreams; s++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -