📄 removewithvalues.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* RemoveWithValues.java
* Copyright (C) 1999 Eibe Frank
*
*/
package weka.filters.unsupervised.instance;
import interbase.interclient.InvalidArgumentException;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.SingleIndex;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.StreamableFilter;
import weka.filters.UnsupervisedFilter;
/**
* Filters instances according to the value of an attribute.
* <p>
*
* Valid filter-specific options are:
* <p>
*
* -C num <br>
* Choose attribute to be used for selection (default last).
* <p>
*
* -S num <br>
* Numeric value to be used for selection on numeric attribute. Instances with
* values smaller than given value will be selected. (default 0)
* <p>
*
* -L index1,index2-index4,... <br>
* Range of label indices to be used for selection on nominal attribute. First
* and last are valid indexes. (default all values)
* <p>
*
* -M<br>
* Missing values count as a match. This setting is independent of the -V
* option. (default missing values don't match)
* <p>
*
* -V<br>
* Invert matching sense.
* <p>
*
* -H<br>
* When selecting on nominal attributes, removes header references to excluded
* values.
* <p>
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision$
*/
public class RemoveWithValues extends Filter implements UnsupervisedFilter,
StreamableFilter, OptionHandler {
/** The attribute's index setting. */
private SingleIndex m_AttIndex = new SingleIndex("last");
/** Stores which values of nominal attribute are to be used for filtering. */
protected Range m_Values;
/** Stores which value of a numeric attribute is to be used for filtering. */
protected double m_Value = 0;
/** True if missing values should count as a match */
protected boolean m_MatchMissingValues = false;
/** Modify header for nominal attributes? */
protected boolean m_ModifyHeader = true;
/** If m_ModifyHeader, stores a mapping from old to new indexes */
protected int[] m_NominalMapping;
/**
* Returns a string describing this classifier
*
* @return a description of the classifier suitable for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "Filters instances according to the value of an attribute.";
}
/** Default constructor */
public RemoveWithValues() {
m_Values = new Range("first-last");
m_Values.setInvert(true);
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(5);
newVector.addElement(new Option(
"\tChoose attribute to be used for selection.", "C", 1,
"-C <num>"));
newVector
.addElement(new Option(
"\tNumeric value to be used for selection on numeric\n"
+ "\tattribute.\n"
+ "\tInstances with values smaller than given value will\n"
+ "\tbe selected. (default 0)", "S", 1,
"-S <num>"));
newVector
.addElement(new Option(
"\tRange of label indices to be used for selection on\n"
+ "\tnominal attribute.\n"
+ "\tFirst and last are valid indexes. (default all values)",
"L", 1, "-L <index1,index2-index4,...>"));
newVector.addElement(new Option(
"\tMissing values count as a match. This setting is\n"
+ "\tindependent of the -V option.\n"
+ "\t(default missing values don't match)", "M", 0,
"-M"));
newVector.addElement(new Option("\tInvert matching sense.", "V", 0,
"-V"));
newVector.addElement(new Option(
"\tWhen selecting on nominal attributes, removes header\n"
+ "\treferences to excluded values.", "H", 0, "-H"));
return newVector.elements();
}
/**
* Parses a given list of options. Valid options are:
* <p>
*
* -C num <br>
* Choose attribute to be used for selection (default last).
* <p>
*
* -S num <br>
* Numeric value to be used for selection on numeric attribute. Instances
* with values smaller than given value will be selected. (default 0)
* <p>
*
* -L index1,index2-index4,... <br>
* Range of label indices to be used for selection on nominal attribute.
* First and last are valid indexes. (default all values)
* <p>
*
* -M<br>
* Missing values count as a match. This setting is independent of the -V
* option. (default missing values don't match)
* <p>
*
* -V<br>
* Invert matching sense.
* <p>
*
* -H<br>
* When selecting on nominal attributes, removes header references to
* excluded values.
* <p>
*
* @param options
* the list of options as an array of strings
* @exception Exception
* if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String attIndex = Utils.getOption('C', options);
if (attIndex.length() != 0) {
setAttributeIndex(attIndex);
} else {
setAttributeIndex("last");
}
String splitPoint = Utils.getOption('S', options);
if (splitPoint.length() != 0) {
setSplitPoint((new Double(splitPoint)).doubleValue());
} else {
setSplitPoint(0);
}
String convertList = Utils.getOption('L', options);
if (convertList.length() != 0) {
setNominalIndices(convertList);
} else {
setNominalIndices("first-last");
}
setInvertSelection(Utils.getFlag('V', options));
setMatchMissingValues(Utils.getFlag('M', options));
setModifyHeader(Utils.getFlag('H', options));
// Re-initialize output format according to new options
if (getInputFormat() != null) {
setInputFormat(getInputFormat());
}
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
String[] options = new String[8];
int current = 0;
options[current++] = "-S";
options[current++] = "" + getSplitPoint();
options[current++] = "-C";
options[current++] = "" + (getAttributeIndex());
if (!getNominalIndices().equals("")) {
options[current++] = "-L";
options[current++] = getNominalIndices();
}
if (getInvertSelection()) {
options[current++] = "-V";
}
if (getModifyHeader()) {
options[current++] = "-H";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Sets the format of the input instances.
*
* @param instanceInfo
* an Instances object containing the input instance structure
* (any instances contained in the object are ignored - only the
* structure is required).
* @exception UnsupportedAttributeTypeException
* if the specified attribute is neither numeric or nominal.
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(instanceInfo);
m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
if (!isNumeric() && !isNominal()) {
throw new UnsupportedAttributeTypeException(
"Can only handle numeric " + "or nominal attributes.");
}
m_Values.setUpper(instanceInfo.attribute(m_AttIndex.getIndex())
.numValues() - 1);
if (isNominal() && m_ModifyHeader) {
instanceInfo = new Instances(instanceInfo, 0); // copy before
// modifying
Attribute oldAtt = instanceInfo.attribute(m_AttIndex.getIndex());
int[] selection = m_Values.getSelection();
FastVector newVals = new FastVector();
for (int i = 0; i < selection.length; i++) {
newVals.addElement(oldAtt.value(selection[i]));
}
instanceInfo.deleteAttributeAt(m_AttIndex.getIndex());
instanceInfo.insertAttributeAt(
new Attribute(oldAtt.name(), newVals), m_AttIndex
.getIndex());
m_NominalMapping = new int[oldAtt.numValues()];
for (int i = 0; i < m_NominalMapping.length; i++) {
boolean found = false;
for (int j = 0; j < selection.length; j++) {
if (selection[j] == i) {
m_NominalMapping[i] = j;
found = true;
break;
}
}
if (!found) {
m_NominalMapping[i] = -1;
}
}
}
setOutputFormat(instanceInfo);
return true;
}
/**
* Input an instance for filtering. Ordinarily the instance is processed and
* made available for output immediately. Some filters require all instances
* be read before producing output.
*
* @param instance
* the input instance
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -