📄 uselessattributefilter.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.filters;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import org.agentacademy.modules.dataminer.core.*;
import java.util.Enumeration;
import java.util.Vector;
import org.apache.log4j.Logger;
/**
* This filter removes attributes that do not vary at all or that vary too much.
* All constant attributes are deleted automatically, along with any that exceed
* the maximum percentage of variance parameter.<p>
*
* Valid filter-specific options are: <p>
*
* -T type <br>
* Attribute type to delete.
* Options are "nominal", "numeric", "string" and "date". (default "string")<p>
*
*/
public class UselessAttributeFilter extends Filter implements OptionHandler {
public static Logger log = Logger.getLogger(UselessAttributeFilter.class);
/** The type of attribute to delete */
protected double m_maxVariancePercentage = 100;
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input instance
* structure (any instances contained in the object are ignored - only the
* structure is required).
* @return true if the outputFormat may be collected immediately
* @exception Exception if the inputFormat can't be set successfully
*/
public boolean setInputFormat(Instances instanceInfo) throws Exception {
super.setInputFormat(instanceInfo);
return false;
}
/**
* Input an instance for filtering.
*
* @param instance the input instance
* @return true if the filtered instance may now be
* collected with output().
*/
public boolean input(Instance instance) {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (m_NewBatch) {
resetQueue();
m_NewBatch = false;
}
bufferInput(instance);
return false;
}
/**
* Signify that this batch of input to the filter is finished.
*
* @return true if there are instances pending output
*/
public boolean batchFinished() throws Exception {
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
// do filtering here
Instances toFilter = getInputFormat();
int[] attsToDelete = new int[toFilter.numAttributes()];
int numToDelete = 0;
for(int i = 0; i < toFilter.numAttributes(); i++) {
AttributeStats stats = toFilter.attributeStats(i);
if (stats.distinctCount < 2) {
// remove constant attributes
attsToDelete[numToDelete++] = i;
} else {
// remove attributes that vary too much
double variancePercent = (double) stats.distinctCount
/ (double) stats.totalCount * 100.0;
if (variancePercent > m_maxVariancePercentage) attsToDelete[numToDelete++] = i;
}
}
int[] finalAttsToDelete = new int[numToDelete];
System.arraycopy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);
AttributeFilter attributeFilter = new AttributeFilter();
attributeFilter.setAttributeIndicesArray(finalAttsToDelete);
attributeFilter.setInvertSelection(false);
attributeFilter.setInputFormat(toFilter);
for (int i = 0; i < toFilter.numInstances(); i++) {
attributeFilter.input(toFilter.instance(i));
}
attributeFilter.batchFinished();
Instance processed;
Instances outputDataset = attributeFilter.getOutputFormat();
// restore old relation name to hide attribute filter stamp
outputDataset.setRelationName(toFilter.relationName());
setOutputFormat(outputDataset);
while ((processed = attributeFilter.output()) != null) {
processed.setDataset(outputDataset);
push(processed);
}
flushInput();
m_NewBatch = true;
return (numPendingOutput() != 0);
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(1);
newVector.addElement(new Option(
"\tMaximum variance percentage allowed (default 100)",
"M", 1, "-M <max variance %>"));
return newVector.elements();
}
/**
* Parses the options for this object. Valid options are: <p>
*
* -T type <br>
* Attribute type to delete.
* Options are "nominal", "numeric", "string" and "date". (default "string")<p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String mString = Utils.getOption('M', options);
if (mString.length() != 0) {
setMaximumVariancePercentageAllowed((int) Double.valueOf(mString).doubleValue());
} else {
setMaximumVariancePercentageAllowed(100.0);
}
if (getInputFormat() != null) {
setInputFormat(getInputFormat());
}
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [2];
int current = 0;
options[current++] = "-M";
options[current++] = "" + getMaximumVariancePercentageAllowed();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns a string describing this filter
*
* @return a description of the filter suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Removes constant attributes, along with attributes to vary too much.";
}
/**
* Returns the tip text for this property
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String maximumVariancePercentageAllowedTipText() {
return "Set the threshold for the highest variance allowed before an attribute will be deleted."
+ "Specifically, if (number_of_distinct_values / total_number_of_values * 100)"
+ " is greater than this value then the attribute will be removed.";
}
/**
* Sets the maximum variance attributes are allowed to have before they are
* deleted by the filter.
*
* @param maxVariance the maximum variance allowed, specified as a percentage
*/
public void setMaximumVariancePercentageAllowed(double maxVariance) {
m_maxVariancePercentage = maxVariance;
}
/**
* Gets the maximum variance attributes are allowed to have before they are
* deleted by the filter.
*
* @return the maximum variance allowed, specified as a percentage
*/
public double getMaximumVariancePercentageAllowed() {
return m_maxVariancePercentage;
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments to the filter: use -h for help
*/
public static void main(String [] argv) {
try {
if (Utils.getFlag('b', argv)) {
Filter.batchFilterFile(new UselessAttributeFilter(), argv);
} else {
Filter.filterFile(new UselessAttributeFilter(), argv);
}
} catch (Exception ex) {
log.error(ex.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -