📄 filter.java
字号:
int instIndex = instSrcCompat ? srcStrAtts[i] : destStrAtts[i];
Attribute src = srcDataset.attribute(srcStrAtts[i]);
Attribute dest = destDataset.attribute(destStrAtts[i]);
if (!instance.isMissing(instIndex)) {
//System.err.println(instance.value(srcIndex)
// + " " + src.numValues()
// + " " + dest.numValues());
int valIndex = dest.addStringValue(src, (int)instance.value(instIndex));
// setValue here shouldn't be too slow here unless your dataset has
// squillions of string attributes
instance.setValue(instIndex, (double)valIndex);
}
}
}
/**
* This will remove all buffered instances from the inputformat dataset.
* Use this method rather than getInputFormat().delete();
*/
protected void flushInput() {
if (m_InputStringAtts.length > 0) {
m_InputFormat = m_InputFormat.stringFreeStructure();
} else {
// This more efficient than new Instances(m_InputFormat, 0);
m_InputFormat.delete();
}
}
/**
* @deprecated use <code>setInputFormat(Instances)</code> instead.
*/
public boolean inputFormat(Instances instanceInfo) throws Exception {
return setInputFormat(instanceInfo);
}
/**
* Sets the format of the input instances. If the filter is able to
* determine the output format before seeing any input instances, it
* does so here. This default implementation clears the output format
* and output queue, and the new batch flag is set. Overriders should
* call <code>super.setInputFormat(Instances)</code>
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored - only the
* structure is required).
* @return true if the outputFormat may be collected immediately
* @exception Exception if the inputFormat can't be set successfully
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
m_InputFormat = instanceInfo.stringFreeStructure();
m_InputStringAtts = getStringIndices(instanceInfo);
m_OutputFormat = null;
m_OutputQueue = new Queue();
m_NewBatch = true;
return false;
}
/**
* @deprecated use <code>getOutputFormat()</code> instead.
*/
public Instances outputFormat() {
return getOutputFormat();
}
/**
* Gets the format of the output instances. This should only be called
* after input() or batchFinished() has returned true. The relation
* name of the output instances should be changed to reflect the
* action of the filter (eg: add the filter name and options).
*
* @return an Instances object containing the output instance
* structure only.
* @exception NullPointerException if no input structure has been
* defined (or the output format hasn't been determined yet)
*/
public Instances getOutputFormat() {
if (m_OutputFormat == null) {
throw new NullPointerException("No output format defined.");
}
return new Instances(m_OutputFormat, 0);
}
/**
* Input an instance for filtering. Ordinarily the instance is
* processed and made available for output immediately. Some filters
* require all instances be read before producing output, in which
* case output instances should be collected after calling
* batchFinished(). If the input marks the start of a new batch, the
* output queue is cleared. This default implementation assumes all
* instance conversion will occur when batchFinished() is called.
*
* @param instance the input instance
* @return true if the filtered instance may now be
* collected with output().
* @exception NullPointerException if the input format has not been
* defined.
* @exception Exception if the input instance was not of the correct
* format or if there was a problem with the filtering.
*/
public boolean input(Instance instance) throws Exception {
if (m_InputFormat == null) {
throw new NullPointerException("No input instance format defined");
}
if (m_NewBatch) {
m_OutputQueue = new Queue();
m_NewBatch = false;
}
bufferInput(instance);
return false;
}
/**
* Signify that this batch of input to the filter is finished. If
* the filter requires all instances prior to filtering, output()
* may now be called to retrieve the filtered instances. Any
* subsequent instances filtered should be filtered based on setting
* obtained from the first batch (unless the inputFormat has been
* re-assigned or new options have been set). This default
* implementation assumes all instance processing occurs during
* inputFormat() and input().
*
* @return true if there are instances pending output
* @exception NullPointerException if no input structure has been defined,
* @exception Exception if there was a problem finishing the batch.
*/
public boolean batchFinished() throws Exception {
if (m_InputFormat == null) {
throw new NullPointerException("No input instance format defined");
}
flushInput();
m_NewBatch = true;
return (numPendingOutput() != 0);
}
/**
* Output an instance after filtering and remove from the output queue.
*
* @return the instance that has most recently been filtered (or null if
* the queue is empty).
* @exception NullPointerException if no output structure has been defined
*/
public Instance output() {
if (m_OutputFormat == null) {
throw new NullPointerException("No output instance format defined");
}
if (m_OutputQueue.empty()) {
return null;
}
Instance result = (Instance)m_OutputQueue.pop();
// Clear out references to old strings occasionally
if (m_OutputQueue.empty() && m_NewBatch) {
if (m_OutputStringAtts.length > 0) {
m_OutputFormat = m_OutputFormat.stringFreeStructure();
}
}
return result;
}
/**
* Output an instance after filtering but do not remove from the
* output queue.
*
* @return the instance that has most recently been filtered (or null if
* the queue is empty).
* @exception NullPointerException if no input structure has been defined
*/
public Instance outputPeek() {
if (m_OutputFormat == null) {
throw new NullPointerException("No output instance format defined");
}
if (m_OutputQueue.empty()) {
return null;
}
Instance result = (Instance)m_OutputQueue.peek();
return result;
}
/**
* Returns the number of instances pending output
*
* @return the number of instances pending output
* @exception NullPointerException if no input structure has been defined
*/
public int numPendingOutput() {
if (m_OutputFormat == null) {
throw new NullPointerException("No output instance format defined");
}
return m_OutputQueue.size();
}
/**
* Returns whether the output format is ready to be collected
*
* @return true if the output format is set
*/
public boolean isOutputFormatDefined() {
return (m_OutputFormat != null);
}
/**
* Gets an array containing the indices of all string attributes.
*
* @param insts the Instances to scan for string attributes.
* @return an array containing the indices of string attributes in
* the input structure. Will be zero-length if there are no
* string attributes
*/
protected int [] getStringIndices(Instances insts) {
// Scan through getting the indices of String attributes
int [] index = new int [insts.numAttributes()];
int indexSize = 0;
for (int i = 0; i < insts.numAttributes(); i++) {
if (insts.attribute(i).type() == Attribute.STRING) {
index[indexSize++] = i;
}
}
int [] result = new int [indexSize];
System.arraycopy(index, 0, result, 0, indexSize);
return result;
}
/**
* Filters an entire set of instances through a filter and returns
* the new set.
*
* @param data the data to be filtered
* @param filter the filter to be used
* @return the filtered set of data
* @exception Exception if the filter can't be used successfully
*/
public static Instances useFilter(Instances data,
Filter filter) throws Exception {
/*
System.err.println(filter.getClass().getName()
+ " in:" + data.numInstances());
*/
for (int i = 0; i < data.numInstances(); i++) {
filter.input(data.instance(i));
}
filter.batchFinished();
Instances newData = filter.getOutputFormat();
Instance processed;
while ((processed = filter.output()) != null) {
newData.add(processed);
}
/*
System.err.println(filter.getClass().getName()
+ " out:" + newData.numInstances());
*/
return newData;
}
/**
* Method for testing filters.
*
* @param argv should contain the following arguments: <br>
* -i input_file <br>
* -o output_file <br>
* -c class_index <br>
* or -h for help on options
* @exception Exception if something goes wrong or the user requests help on
* command options
*/
public static void filterFile(Filter filter, String [] options)
throws Exception {
boolean debug = false;
Instances data = null;
Reader input = null;
PrintWriter output = null;
boolean helpRequest;
try {
helpRequest = Utils.getFlag('h', options);
if (Utils.getFlag('d', options)) {
debug = true;
}
String infileName = Utils.getOption('i', options);
String outfileName = Utils.getOption('o', options);
String classIndex = Utils.getOption('c', options);
if (filter instanceof OptionHandler) {
((OptionHandler)filter).setOptions(options);
}
Utils.checkForRemainingOptions(options);
if (helpRequest) {
throw new Exception("Help requested.\n");
}
if (infileName.length() != 0) {
input = new BufferedReader(new FileReader(infileName));
} else {
input = new BufferedReader(new InputStreamReader(System.in));
}
if (outfileName.length() != 0) {
output = new PrintWriter(new FileOutputStream(outfileName));
} else {
output = new PrintWriter(System.out);
}
data = new Instances(input, 1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -