📄 filter.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Filter.java
* Copyright (C) 1999 Len Trigg
*
*/
package weka.filters;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Serializable;
import java.util.Enumeration;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Queue;
import weka.core.Utils;
/**
* An abstract class for instance filters: objects that take instances
* as input, carry out some transformation on the instance and then
* output the instance. The method implementations in this class
* assume that most of the work will be done in the methods overridden
* by subclasses.<p>
*
* A simple example of filter use. This example doesn't remove
* instances from the output queue until all instances have been
* input, so has higher memory consumption than an approach that
* uses output instances as they are made available:<p>
*
* <code> <pre>
* Filter filter = ..some type of filter..
* Instances instances = ..some instances..
* for (int i = 0; i < data.numInstances(); i++) {
* filter.input(data.instance(i));
* }
* filter.batchFinished();
* Instances newData = filter.outputFormat();
* Instance processed;
* while ((processed = filter.output()) != null) {
* newData.add(processed);
* }
* ..do something with newData..
* </pre> </code>
*
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision$
*/
public abstract class Filter implements Serializable {
/*
* Filter refactoring TODO:
*
* - Update all filters to use getOutputFormat and setInputFormat
* instead of outputFormat, outputFormatPeek and inputFormat.
* - Update users of filters to use getOutputFormat and setInputFormat
* - remove outputFormat, outputFormatPeek and inputFormat
*
*/
/** Debugging mode */
private boolean m_Debug = false;
/** The output format for instances */
private Instances m_OutputFormat = null;
/** The output instance queue */
private Queue m_OutputQueue = null;
/** Indices of string attributes in the output format */
private int [] m_OutputStringAtts = null;
/** Indices of string attributes in the input format */
private int [] m_InputStringAtts = null;
/** The input format for instances */
private Instances m_InputFormat = null;
/** Record whether the filter is at the start of a batch */
protected boolean m_NewBatch = true;
/**
* Sets the format of output instances. The derived class should use this
* method once it has determined the outputformat. The
* output queue is cleared.
*
* @param outputFormat the new output format
*/
protected void setOutputFormat(Instances outputFormat) {
if (outputFormat != null) {
m_OutputFormat = outputFormat.stringFreeStructure();
m_OutputStringAtts = getStringIndices(m_OutputFormat);
// Rename the attribute
String relationName = outputFormat.relationName()
+ "-" + this.getClass().getName();
if (this instanceof OptionHandler) {
String [] options = ((OptionHandler)this).getOptions();
for (int i = 0; i < options.length; i++) {
relationName += options[i].trim();
}
}
m_OutputFormat.setRelationName(relationName);
} else {
m_OutputFormat = null;
}
m_OutputQueue = new Queue();
}
/**
* Gets the currently set inputformat instances. This dataset may contain
* buffered instances.
*
* @return the input Instances.
*/
protected Instances getInputFormat() {
return m_InputFormat;
}
/**
* Returns a reference to the current input format without
* copying it.
*
* @return a reference to the current input format
*/
protected Instances inputFormatPeek() {
return m_InputFormat;
}
/**
* Returns a reference to the current output format without
* copying it.
*
* @return a reference to the current output format
*/
protected Instances outputFormatPeek() {
return m_OutputFormat;
}
/**
* Adds an output instance to the queue. The derived class should use this
* method for each output instance it makes available.
*
* @param instance the instance to be added to the queue.
*/
protected void push(Instance instance) {
if (instance != null) {
copyStringValues(instance, m_OutputFormat, m_OutputStringAtts);
instance.setDataset(m_OutputFormat);
m_OutputQueue.push(instance);
}
}
/**
* Clears the output queue.
*/
protected void resetQueue() {
m_OutputQueue = new Queue();
}
/**
* Adds the supplied input instance to the inputformat dataset for
* later processing. Use this method rather than
* getInputFormat().add(instance). Or else. Note that the provided
* instance gets copied when buffered.
*
* @param instance the <code>Instance</code> to buffer.
*/
protected void bufferInput(Instance instance) {
if (instance != null) {
copyStringValues(instance, m_InputFormat, m_InputStringAtts);
m_InputFormat.add(instance);
}
}
/**
* Returns an array containing the indices of all string attributes in the
* input format. This index is created during setInputFormat()
*
* @return an array containing the indices of string attributes in the
* input dataset.
*/
protected int [] getInputStringIndex() {
return m_InputStringAtts;
}
/**
* Returns an array containing the indices of all string attributes in the
* output format. This index is created during setOutputFormat()
*
* @return an array containing the indices of string attributes in the
* output dataset.
*/
protected int [] getOutputStringIndex() {
return m_OutputStringAtts;
}
/**
* Copies string values contained in the instance copied to a new
* dataset. The Instance must already be assigned to a dataset. This
* dataset and the destination dataset must have the same structure.
*
* @param instance the Instance containing the string values to copy.
* @param destDataset the destination set of Instances
* @param strAtts an array containing the indices of any string attributes
* in the dataset.
*/
private void copyStringValues(Instance inst, Instances destDataset,
int []strAtts) {
if (strAtts.length == 0) {
return;
}
if (inst.dataset() == null) {
throw new IllegalArgumentException("Instance has no dataset assigned!!");
} else if (inst.dataset().numAttributes() != destDataset.numAttributes()) {
throw new IllegalArgumentException("Src and Dest differ in # of attributes!!");
}
copyStringValues(inst, true, inst.dataset(), strAtts,
destDataset, strAtts);
}
/**
* Takes string values referenced by an Instance and copies them from a
* source dataset to a destination dataset. The instance references are
* updated to be valid for the destination dataset. The instance may have the
* structure (i.e. number and attribute position) of either dataset (this
* affects where references are obtained from). The source dataset must
* have the same structure as the filter input format and the destination
* must have the same structure as the filter output format.
*
* @param instance the instance containing references to strings in the source
* dataset that will have references updated to be valid for the destination
* dataset.
* @param instSrcCompat true if the instance structure is the same as the
* source, or false if it is the same as the destination
* @param srcDataset the dataset for which the current instance string
* references are valid (after any position mapping if needed)
* @param destDataset the dataset for which the current instance string
* references need to be inserted (after any position mapping if needed)
*/
protected void copyStringValues(Instance instance, boolean instSrcCompat,
Instances srcDataset, Instances destDataset) {
copyStringValues(instance, instSrcCompat, srcDataset, m_InputStringAtts,
destDataset, m_OutputStringAtts);
}
/**
* Takes string values referenced by an Instance and copies them from a
* source dataset to a destination dataset. The instance references are
* updated to be valid for the destination dataset. The instance may have the
* structure (i.e. number and attribute position) of either dataset (this
* affects where references are obtained from). Only works if the number
* of string attributes is the same in both indices (implicitly these string
* attributes should be semantically same but just with shifted positions).
*
* @param instance the instance containing references to strings in the source
* dataset that will have references updated to be valid for the destination
* dataset.
* @param instSrcCompat true if the instance structure is the same as the
* source, or false if it is the same as the destination (i.e. which of the
* string attribute indices contains the correct locations for this instance).
* @param srcDataset the dataset for which the current instance string
* references are valid (after any position mapping if needed)
* @param srcStrAtts an array containing the indices of string attributes
* in the source datset.
* @param destDataset the dataset for which the current instance string
* references need to be inserted (after any position mapping if needed)
* @param destStrAtts an array containing the indices of string attributes
* in the destination datset.
*/
protected void copyStringValues(Instance instance, boolean instSrcCompat,
Instances srcDataset, int []srcStrAtts,
Instances destDataset, int []destStrAtts) {
if (srcDataset == destDataset) {
return;
}
if (srcStrAtts.length != destStrAtts.length) {
throw new IllegalArgumentException("Src and Dest string indices differ in length!!");
}
for (int i = 0; i < srcStrAtts.length; i++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -