📄 partitionedmultifilter.java
字号:
*/ protected String getFilterSpec(Filter filter) { String result; if (filter == null) { result = ""; } else { result = filter.getClass().getName(); if (filter instanceof OptionHandler) result += " " + Utils.joinOptions(((OptionHandler) filter).getOptions()); } return result; } /** * Sets the list of possible Ranges to choose from. * Also resets the state of the Range (this reset doesn't affect the * options). * * @param Ranges an array of Ranges with all options set. * @see #reset() */ public void setRanges(Range[] Ranges) { m_Ranges = Ranges; reset(); } /** * Gets the list of possible Ranges to choose from. * * @return the array of Ranges */ public Range[] getRanges() { return m_Ranges; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String RangesTipText() { return "The attribute ranges to be used."; } /** * Gets a single Range from the set of available Ranges. * * @param index the index of the Range wanted * @return the Range */ public Range getRange(int index) { return m_Ranges[index]; } /** * determines the indices of unused attributes (ones that are not covered * by any of the range) * * @param data the data to base the determination on * @see #m_IndicesUnused */ protected void determineUnusedIndices(Instances data) { Vector<Integer> indices; int i; int n; boolean covered; // traverse all ranges indices = new Vector<Integer>(); for (i = 0; i < data.numAttributes(); i++) { if (i == data.classIndex()) continue; covered = false; for (n = 0; n < getRanges().length; n++) { if (getRanges()[n].isInRange(i)) { covered = true; break; } } if (!covered) indices.add(new Integer(i)); } // create array m_IndicesUnused = new int[indices.size()]; for (i = 0; i < indices.size(); i++) m_IndicesUnused[i] = indices.get(i).intValue(); if (getDebug()) System.out.println( "Unused indices: " + Utils.arrayToString(m_IndicesUnused)); } /** * generates a subset of the dataset with only the attributes from the range * (class is always added if present) * * @param data the data to work on * @param range the range of attribute to use * @return the generated subset * @throws Exception if creation fails */ protected Instances generateSubset(Instances data, Range range) throws Exception { Remove filter; String atts; Instances result; // determine attributes atts = range.getRanges(); if ((data.classIndex() > -1) && (!range.isInRange(data.classIndex()))) atts += "," + (data.classIndex() + 1); // setup filter filter = new Remove(); filter.setAttributeIndices(atts); filter.setInvertSelection(true); filter.setInputFormat(data); // generate output result = Filter.useFilter(data, filter); return result; } /** * renames all the attributes in the dataset (excluding the class if present) * by adding the prefix to the name. * * @param data the data to work on * @param prefix the prefix for the attributes * @return a copy of the data with the attributes renamed * @throws Exception if renaming fails */ protected Instances renameAttributes(Instances data, String prefix) throws Exception { Instances result; int i; FastVector atts; // rename attributes atts = new FastVector(); for (i = 0; i < data.numAttributes(); i++) { if (i == data.classIndex()) atts.addElement(data.attribute(i).copy()); else atts.addElement(data.attribute(i).copy(prefix + data.attribute(i).name())); } // create new dataset result = new Instances(data.relationName(), atts, data.numInstances()); for (i = 0; i < data.numInstances(); i++) { result.add((Instance) data.instance(i).copy()); } // set class if present if (data.classIndex() > -1) result.setClassIndex(data.classIndex()); return result; } /** * Determines the output format based only on the full input dataset and * returns this otherwise null is returned. In case the output format cannot * be returned immediately, i.e., immediateOutputFormat() returns false, * then this method will be called from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { Instances result; Instances processed; int i; int n; FastVector atts; Attribute att; // we need the full dataset here, see process(Instances) if (inputFormat.numInstances() == 0) return null; checkDimensions(); // determine unused indices determineUnusedIndices(inputFormat); atts = new FastVector(); for (i = 0; i < getFilters().length; i++) { if (!isFirstBatchDone()) { // generate subset processed = generateSubset(inputFormat, getRange(i)); // set input format if (!getFilter(i).setInputFormat(processed)) Filter.useFilter(processed, getFilter(i)); } // get output format processed = getFilter(i).getOutputFormat(); // rename attributes processed = renameAttributes(processed, "filtered-" + i + "-"); // add attributes for (n = 0; n < processed.numAttributes(); n++) { if (n == processed.classIndex()) continue; atts.addElement(processed.attribute(n).copy()); } } // add unused attributes if (!getRemoveUnused()) { for (i = 0; i < m_IndicesUnused.length; i++) { att = inputFormat.attribute(m_IndicesUnused[i]); atts.addElement(att.copy("unfiltered-" + att.name())); } } // add class if present if (inputFormat.classIndex() > -1) atts.addElement(inputFormat.classAttribute().copy()); // generate new dataset result = new Instances(inputFormat.relationName(), atts, 0); if (inputFormat.classIndex() > -1) result.setClassIndex(result.numAttributes() - 1); return result; } /** * Processes the given data (may change the provided dataset) and returns * the modified version. This method is called in batchFinished(). * * @param instances the data to process * @return the modified data * @throws Exception in case the processing goes wrong * @see #batchFinished() */ protected Instances process(Instances instances) throws Exception { Instances result; int i; int n; int m; int index; Instances[] processed; Instance inst; Instance newInst; double[] values; Vector errors; checkDimensions(); // set upper limits for (i = 0; i < m_Ranges.length; i++) m_Ranges[i].setUpper(instances.numAttributes() - 1); // determine unused indices determineUnusedIndices(instances); // pass data through all datasets processed = new Instances[getFilters().length]; for (i = 0; i < getFilters().length; i++) { processed[i] = generateSubset(instances, getRange(i)); if (!isFirstBatchDone()) getFilter(i).setInputFormat(processed[i]); processed[i] = Filter.useFilter(processed[i], getFilter(i)); } // set output format (can only be determined with full dataset, hence here) result = determineOutputFormat(instances); setOutputFormat(result); // check whether all filters didn't change the number of instances errors = new Vector(); for (i = 0; i < processed.length; i++) { if (processed[i].numInstances() != instances.numInstances()) errors.add(new Integer(i)); } if (errors.size() > 0) throw new IllegalStateException( "The following filter(s) changed the number of instances: " + errors); // assemble data for (i = 0; i < instances.numInstances(); i++) { inst = instances.instance(i); values = new double[result.numAttributes()]; // filtered data index = 0; for (n = 0; n < processed.length; n++) { for (m = 0; m < processed[n].numAttributes(); m++) { if (m == processed[n].classIndex()) continue; values[index] = processed[n].instance(i).value(m); index++; } } // unused attributes if (!getRemoveUnused()) { for (n = 0; n < m_IndicesUnused.length; n++) { values[index] = inst.value(m_IndicesUnused[n]); index++; } } // class if (instances.classIndex() > -1) values[values.length - 1] = inst.value(instances.classIndex()); // generate and add instance if (inst instanceof SparseInstance) newInst = new SparseInstance(instances.instance(i).weight(), values); else newInst = new Instance(instances.instance(i).weight(), values); result.add(newInst); } return result; } /** * Main method for executing this class. * * @param args should contain arguments for the filter: use -h for help */ public static void main(String[] args) { runFilter(new PartitionedMultiFilter(), args); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -