resample.java
来自「Weka」· Java 代码 · 共 669 行 · 第 1/2 页
JAVA
669 行
* @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String noReplacementTipText() { return "Disables the replacement of instances."; } /** * Gets whether instances are drawn with or without replacement. * * @return true if the replacement is disabled */ public boolean getNoReplacement() { return m_NoReplacement; } /** * Sets whether instances are drawn with or with out replacement. * * @param value if true then the replacement of instances is disabled */ public void setNoReplacement(boolean value) { m_NoReplacement = value; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String invertSelectionTipText() { return "Inverts the selection (only if instances are drawn WITHOUT replacement)."; } /** * Gets whether selection is inverted (only if instances are drawn WIHTOUT * replacement). * * @return true if the replacement is disabled * @see #m_NoReplacement */ public boolean getInvertSelection() { return m_InvertSelection; } /** * Sets whether the selection is inverted (only if instances are drawn WIHTOUT * replacement). * * @param value if true then selection is inverted */ public void setInvertSelection(boolean value) { m_InvertSelection = value; } /** * Returns the Capabilities of this filter. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enableAllAttributes(); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); return result; } /** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input * instance structure (any instances contained in the object are * ignored - only the structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if the input format can't be set * successfully */ public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); setOutputFormat(instanceInfo); return true; } /** * Input an instance for filtering. Filter requires all * training instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input structure has been defined */ public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } if (isFirstBatchDone()) { push(instance); return true; } else { bufferInput(instance); return false; } } /** * Signify that this batch of input to the filter is finished. * If the filter requires all instances prior to filtering, * output() may now be called to retrieve the filtered instances. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (!isFirstBatchDone()) { // Do the subsample, and clear the input instances. createSubsample(); } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); } /** * creates the subsample with replacement * * @param random the random number generator to use * @param origSize the original size of the dataset * @param sampleSize the size to generate * @param actualClasses the number of classes found in the data * @param classIndices the indices where classes start */ public void createSubsampleWithReplacement(Random random, int origSize, int sampleSize, int actualClasses, int[] classIndices) { for (int i = 0; i < sampleSize; i++) { int index = 0; if (random.nextDouble() < m_BiasToUniformClass) { // Pick a random class (of those classes that actually appear) int cIndex = random.nextInt(actualClasses); for (int j = 0, k = 0; j < classIndices.length - 1; j++) { if ((classIndices[j] != classIndices[j + 1]) && (k++ >= cIndex)) { // Pick a random instance of the designated class index = classIndices[j] + random.nextInt(classIndices[j + 1] - classIndices[j]); break; } } } else { index = random.nextInt(origSize); } push((Instance) getInputFormat().instance(index).copy()); } } /** * creates the subsample without replacement * * @param random the random number generator to use * @param origSize the original size of the dataset * @param sampleSize the size to generate * @param actualClasses the number of classes found in the data * @param classIndices the indices where classes start */ public void createSubsampleWithoutReplacement(Random random, int origSize, int sampleSize, int actualClasses, int[] classIndices) { if (sampleSize > origSize) { sampleSize = origSize; System.err.println( "Resampling with replacement can only use percentage <=100% - " + "Using full dataset!"); } Vector<Integer>[] indices = new Vector[actualClasses]; Vector<Integer>[] indicesNew = new Vector[actualClasses]; // generate list of all indices to draw from for (int i = 0; i < actualClasses; i++) { indices[i] = new Vector<Integer>(classIndices[i + 1] - classIndices[i]); indicesNew[i] = new Vector<Integer>(indices[i].capacity()); for (int n = classIndices[i]; n < classIndices[i + 1]; n++) indices[i].add(n); } // draw X samples int currentSize = origSize; for (int i = 0; i < sampleSize; i++) { int index = 0; if (random.nextDouble() < m_BiasToUniformClass) { // Pick a random class (of those classes that actually appear) int cIndex = random.nextInt(actualClasses); for (int j = 0, k = 0; j < classIndices.length - 1; j++) { if ((classIndices[j] != classIndices[j + 1]) && (k++ >= cIndex)) { // Pick a random instance of the designated class index = random.nextInt(indices[j].size()); indicesNew[j].add(indices[j].get(index)); indices[j].remove(index); break; } } } else { index = random.nextInt(currentSize); for (int n = 0; n < actualClasses; n++) { if (index < indices[n].size()) { indicesNew[n].add(indices[n].get(index)); indices[n].remove(index); break; } else { index -= indices[n].size(); } } currentSize--; } } // sort indices if (getInvertSelection()) { indicesNew = indices; } else { for (int i = 0; i < indicesNew.length; i++) Collections.sort(indicesNew[i]); } // add to ouput for (int i = 0; i < indicesNew.length; i++) { for (int n = 0; n < indicesNew[i].size(); n++) push((Instance) getInputFormat().instance(indicesNew[i].get(n)).copy()); } // clean up for (int i = 0; i < indices.length; i++) { indices[i].clear(); indicesNew[i].clear(); } indices = null; indicesNew = null; } /** * Creates a subsample of the current set of input instances. The output * instances are pushed onto the output queue for collection. */ protected void createSubsample() { int origSize = getInputFormat().numInstances(); int sampleSize = (int) (origSize * m_SampleSizePercent / 100); // Subsample that takes class distribution into consideration // Sort according to class attribute. getInputFormat().sort(getInputFormat().classIndex()); // Create an index of where each class value starts int [] classIndices = new int [getInputFormat().numClasses() + 1]; int currentClass = 0; classIndices[currentClass] = 0; for (int i = 0; i < getInputFormat().numInstances(); i++) { Instance current = getInputFormat().instance(i); if (current.classIsMissing()) { for (int j = currentClass + 1; j < classIndices.length; j++) { classIndices[j] = i; } break; } else if (current.classValue() != currentClass) { for (int j = currentClass + 1; j <= current.classValue(); j++) { classIndices[j] = i; } currentClass = (int) current.classValue(); } } if (currentClass <= getInputFormat().numClasses()) { for (int j = currentClass + 1; j < classIndices.length; j++) { classIndices[j] = getInputFormat().numInstances(); } } int actualClasses = 0; for (int i = 0; i < classIndices.length - 1; i++) { if (classIndices[i] != classIndices[i + 1]) { actualClasses++; } } // Create the new sample Random random = new Random(m_RandomSeed); // Convert pending input instances if (getNoReplacement()) createSubsampleWithoutReplacement( random, origSize, sampleSize, actualClasses, classIndices); else createSubsampleWithReplacement( random, origSize, sampleSize, actualClasses, classIndices); } /** * Main method for testing this class. * * @param argv should contain arguments to the filter: * use -h for help */ public static void main(String [] argv) { runFilter(new Resample(), argv); }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?