📄 nominaltobinaryfilter.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.filters;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import java.util.Enumeration;
import java.util.Vector;
import org.agentacademy.modules.dataminer.core.Attribute;
import org.agentacademy.modules.dataminer.core.FastVector;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Option;
import org.agentacademy.modules.dataminer.core.OptionHandler;
import org.agentacademy.modules.dataminer.core.SparseInstance;
import org.agentacademy.modules.dataminer.core.UnassignedClassException;
import org.agentacademy.modules.dataminer.core.Utils;
import org.apache.log4j.Logger;
/**
* Converts all nominal attributes into binary numeric
* attributes. An attribute with k values is transformed into
* k-1 new binary attributes (in a similar manner to CART if a
* numeric class is assigned). Currently requires that a class attribute
* be set (but this should be changed).<p>
*
* Valid filter-specific options are: <p>
*
* -N <br>
* If binary attributes are to be coded as nominal ones.<p>
*
*/
public class NominalToBinaryFilter extends Filter implements OptionHandler {
public static Logger log = Logger.getLogger(NominalToBinaryFilter.class);
/** The sorted indices of the attribute values. */
private int[][] m_Indices = null;
/** Are the new attributes going to be nominal or numeric ones? */
private boolean m_Numeric = true;
/**
* Sets the format of the input instances.
*
* @param instanceInfo an Instances object containing the input
* instance structure (any instances contained in the object are
* ignored - only the structure is required).
* @return true if the outputFormat may be collected immediately
* @exception Exception if the input format can't be set
* successfully
*/
public boolean setInputFormat(Instances instanceInfo)
throws Exception {
super.setInputFormat(instanceInfo);
if (instanceInfo.classIndex() < 0) {
throw new UnassignedClassException("No class has been assigned to the instances");
}
setOutputFormat();
m_Indices = null;
if (instanceInfo.classAttribute().isNominal()) {
return true;
} else {
return false;
}
}
/**
* Input an instance for filtering. Filter requires all
* training instances be read before producing output.
*
* @param instance the input instance
* @return true if the filtered instance may now be
* collected with output().
* @exception IllegalStateException if no input format has been set
*/
public boolean input(Instance instance) throws Exception{
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if (m_NewBatch) {
resetQueue();
m_NewBatch = false;
}
if ((m_Indices != null) ||
(getInputFormat().classAttribute().isNominal())) {
convertInstance(instance);
return true;
}
bufferInput(instance);
return false;
}
/**
* Signify that this batch of input to the filter is finished.
* If the filter requires all instances prior to filtering,
* output() may now be called to retrieve the filtered instances.
*
* @return true if there are instances pending output
* @exception IllegalStateException if no input structure has been defined
*/
public boolean batchFinished() throws Exception{
if (getInputFormat() == null) {
throw new IllegalStateException("No input instance format defined");
}
if ((m_Indices == null) &&
(getInputFormat().classAttribute().isNumeric())) {
computeAverageClassValues();
setOutputFormat();
// Convert pending input instances
for(int i = 0; i < getInputFormat().numInstances(); i++) {
convertInstance(getInputFormat().instance(i));
}
}
flushInput();
m_NewBatch = true;
return (numPendingOutput() != 0);
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(1);
newVector.addElement(new Option(
"\tSets if binary attributes are to be coded as nominal ones.",
"N", 0, "-N"));
return newVector.elements();
}
/**
* Parses the options for this object. Valid options are: <p>
*
* -N <br>
* If binary attributes are to be coded as nominal ones.<p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
setBinaryAttributesNominal(Utils.getFlag('N', options));
if (getInputFormat() != null)
setInputFormat(getInputFormat());
}
/**
* Gets the current settings of the filter.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions(){
String [] options = new String [1];
int current = 0;
if (getBinaryAttributesNominal()) {
options[current++] = "-N";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Gets if binary attributes are to be treated as nominal ones.
*
* @return true if binary attributes are to be treated as nominal ones
*/
public boolean getBinaryAttributesNominal() {
return !m_Numeric;
}
/**
* Sets if binary attributes are to be treates as nominal ones.
*
* @param bool true if binary attributes are to be treated as nominal ones
*/
public void setBinaryAttributesNominal(boolean bool) {
m_Numeric = !bool;
}
/** Computes average class values for each attribute and value */
private void computeAverageClassValues() {
double totalCounts, sum;
Instance instance;
double [] counts;
double [][] avgClassValues = new double[getInputFormat().numAttributes()][0];
m_Indices = new int[getInputFormat().numAttributes()][0];
for (int j = 0; j < getInputFormat().numAttributes(); j++) {
Attribute att = getInputFormat().attribute(j);
if (att.isNominal()) {
avgClassValues[j] = new double [att.numValues()];
counts = new double [att.numValues()];
for (int i = 0; i < getInputFormat().numInstances(); i++) {
instance = getInputFormat().instance(i);
if (!instance.classIsMissing() &&
(!instance.isMissing(j))) {
counts[(int)instance.value(j)] += instance.weight();
avgClassValues[j][(int)instance.value(j)] +=
instance.weight() * instance.classValue();
}
}
sum = Utils.sum(avgClassValues[j]);
totalCounts = Utils.sum(counts);
if (Utils.gr(totalCounts, 0)) {
for (int k = 0; k < att.numValues(); k++) {
if (Utils.gr(counts[k], 0)) {
avgClassValues[j][k] /= (double)counts[k];
} else {
avgClassValues[j][k] = sum / (double)totalCounts;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -