📄 conceptdriftsimulator.java
字号:
output += " Stream " + streamNames[s] + ":"; for (b = 0; b < noOfBatches; b++) { output += " " + noOfStreamExamplesInBatch[s][b]; } output += "\n"; } LogService.logMessage("ConceptDriftSimulator '" + getName() + "': distribution of streams over batches: " + output, LogService.TASK); } // end of method 'sampleExamples2Batches' /** perform sampling: randomly assign labels to the examples (according to the stream relevance probabilities). */ private void sampleExamples2Labels () throws OperatorException { ExampleReader exampleIterator; // example iterator Example currentExample; // reference to the current example int e, s, b; // index of current example, the data stream it comes from, and its batch index double relevance; // probability that the example is relevant for the user given its stream and batch String label; // simulated user interest for the current example // label as String // double label; // simulated user interest for the current example // label as double int[] noOfPosInBatch, // counters for log file ouput: no. of positive /negative examples for each batch noOfNegInBatch; if ((example2streamIndex == null) || (example2batchIndex == null)) { throw new RuntimeException("ConceptDriftSimulator '" + getName()+"': Prior to calling method 'sampleExamples2Labels', " + "the method 'sampleExamples2Batch' has to be called (class internal error)."); } example2label = new int[noOfExamples]; // maps an example (e.g. document) to its (user interest) label noOfPosInBatch = new int[noOfBatches]; noOfNegInBatch = new int[noOfBatches]; for (b = 0; b < noOfBatches; b++) { noOfPosInBatch[b] = 0; noOfNegInBatch[b] = 0; } LogService.logMessage ("ConceptDriftSimulator '" + getName() + "': sample mapping example2labels.\n", LogService.TASK); // System.out.print("\n\nLabeling:\n"); // ** TMP ** exampleIterator = exampleSet.getExampleReader(); e = 0; while ((e < noOfExamples) && (exampleIterator.hasNext())) { currentExample = exampleIterator.next(); s = example2streamIndex[e]; b = example2batchIndex[e]; // if (b >= noOfBatches) System.out.print("\n example = "+e+", batch = "+b+", stream = "+streamNames[s]+"\n"); if (b < noOfBatches) { // only label examples with legal batch index and skip others (which are not used) relevance = streamRelevance[s][b]; if (relevance == 0.0) { label = Attribute.NEGATIVE_CLASS; example2label[e] = -1; noOfNegInBatch[b]++; // System.out.print("-"); // ** TMP ** } else if (relevance == 1.0) { label = Attribute.POSITIVE_CLASS; example2label[e] = 1; noOfPosInBatch[b]++; //System.out.print("+"); // ** TMP ** } else { if (RandomGenerator.getRandomDoubleInRange(0.0,1.0) <= relevance) { label = Attribute.POSITIVE_CLASS; example2label[e] = +1; noOfPosInBatch[b]++; // System.out.print("+"); // ** TMP ** } else { label = Attribute.NEGATIVE_CLASS; example2label[e] = -1; noOfNegInBatch[b]++; // System.out.print("-"); // ** TMP ** } } } else { // label examples not used in the experiment as negative label = Attribute.NEGATIVE_CLASS; example2label[e] = -1; } // example2label[e] = Integer.parseInt(label); // label as String -- // example2label[e] = (Integer.getInteger(label)).intValue(); // label as String -- // example2label[e] = (int)label; // label as double ++ //System.out.print("."); // ** TMP ** currentExample.setLabel (label); // use setLabel ++ // try { // currentExample.setValue (userInterestAttribute, label); // use setValue -- // } catch (MethodNotSupportedException exception) { // throw new RuntimeException("ConceptDriftSimulator '"+getName()+"': " + // "user interest attribute (= new class label)" + // " must be nominal (internal error, should " + // "never occur)."); // } e++; } // userInterestAttribute.determineNumberOfClasses(exampleSet); // TMP: to be moved to 'ExampleSet.addReference' or ... !! // TMP: counts no. of class values and creates mapping 'class value string to index' and vice versa !! // Sanity checks: Attribute labelAttribute = exampleSet.getLabel(); boolean labelIsNominal = Ontology.ATTRIBUTE_VALUE_TYPE.isA(labelAttribute.getValueType(), Ontology.NOMINAL); LogService.logMessage ("User interest attribute: '" + userInterestAttribute.getName() + "', number of classes = " + userInterestAttribute.getNumberOfClasses(), LogService.TASK); LogService.logMessage ("Class label attribute: '" + labelAttribute.getName() + "', number of classes = " + labelAttribute.getNumberOfClasses(), LogService.TASK); LogService.logMessage ("Class label is nominal: " + (labelIsNominal ? "Yes" : "No") + ".", LogService.TASK); LogService.logMessage ("Class label = user interest: " + ((userInterestAttribute == labelAttribute) ? "Yes" : "No") + ".", LogService.TASK); LogService.logMessage ("ConceptDriftSimulator '" + getName() + "': example2label sampling completed.\n", LogService.TASK); // Log file output: String output = "\n Number of documents of positive and negative examples in each batch:\n"; int totalPos = 0; int totalNeg = 0; output += " Batch:"; for (b = 0; b < noOfBatches; b++) { output += " " + b; } output += "\n Pos: "; for (b = 0; b < noOfBatches; b++) { output += " " + noOfPosInBatch[b]; totalPos += noOfPosInBatch[b]; } output += "\n Neg: "; for (b = 0; b < noOfBatches; b++) { output += " " + noOfNegInBatch[b]; totalNeg += noOfNegInBatch[b]; } output += "\n Total Pos: " + totalPos + "\n Total Neg: " + totalNeg + "\n"; LogService.logMessage("ConceptDriftSimulator '" + getName() + "': distribution of streams over batches: " + output, LogService.TASK); } // end of method 'sampleExamples2Labels' /** scrambles the given array, i.e. brings the elements of the array into a random order. * This method consumes O(n) time, if 'n' is the length of the given array. */ private int[] scrambleArray(int[] array) { int n, i, j, swap; if ((array == null) || (array.length < 2)) { return array; } n = array.length; i = 0; while (i < n-1) { j = RandomGenerator.getRandomIntInRange (i, n-1); // swap the elements at the indices i and j: swap = array[i]; array[i] = array[j]; array[j] = swap; i++; } return array; } // ==== Learner /Applier stuff ==== // /** applies the learner (= first encapsulated inner operator). */ // IOContainer learn (BatchedExampleSet trainingSet) throws OperatorException { // return learningResult = getLearner().apply(getInput().append(new IOObject[] { trainingSet })); // } // /** applies the applier and evaluator (= second encapsulated inner operator). */ // IOContainer evaluate(ExampleSet testSet) throws OperatorException { // if (learningResult == null) { // throw new RuntimeException("Wrong use of 'ConceptDriftSimulator.evaluate(ExampleSet)': " + // "No preceding invocation of 'learn(ExampleSet)'!"); // } // IOContainer result = getEvaluator().apply(learningResult.append(new IOObject[] { testSet })); // learningResult = null; // return result; // } // ===== LogService stuff ===== // void setLastPerformance(PerformanceCriterion pc) { // lastPerformance = pc; // } /** returns <tt>performance</tt> and <tt>variance</tt>, if <tt>setResult()</tt> was previously called. */ // public double getValue(String id) { // if (lastPerformance == null) return super.getValue(id); // if (id.equals("performance")) return lastPerformance.getValue(); // if (id.equals("variance")) return lastPerformance.getVariance(); // return super.getValue(id); // } public List getParameterTypes() { List types = super.getParameterTypes(); types.add(new ParameterTypeInt("number_of_runs", "Specifies how often the concept drift simulation should be repeated " +"for computing the average results.", 2, Integer.MAX_VALUE, 10)); types.add(new ParameterTypeInt("number_of_batches", "Specifies the number of time steps to be simulated.", 2, Integer.MAX_VALUE, 20)); types.add(new ParameterTypeInt("number_of_streams", "Specifies the number of data streams the examples come from", 2, Integer.MAX_VALUE, 2)); types.add(new ParameterTypeInt("window_size", "Size of the fixed time window in number of batches.", 1, Integer.MAX_VALUE, DEFAULT_TIME_WINDOW_SIZE)); types.add(new ParameterTypeCategory("learner_type", "Type of the enclosed learner.", LEARNER_TYPE_NAMES, STATIC_LEARNER)); types.add(new ParameterTypeString("data_stream_names", "Specifies the names of data streams the examples come from (i.e. " +"the possible values of the class label attributes).", null)); types.add(new ParameterTypeString("data_stream_relevance", "Specifies the probability for examples to be relevant to the " +"simulated user interest depending on the data stream they come " +"frome and the current batch.", null)); return types; } public int getNumberOfValidationSteps() { return 1; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -