⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 removewithvalues.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    RemoveWithValues.java
 *    Copyright (C) 1999 Eibe Frank
 *
 */

package weka.filters.unsupervised.instance;

import interbase.interclient.InvalidArgumentException;

import java.util.Enumeration;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.SingleIndex;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.StreamableFilter;
import weka.filters.UnsupervisedFilter;

/**
 * Filters instances according to the value of an attribute.
 * <p>
 * 
 * Valid filter-specific options are:
 * <p>
 * 
 * -C num <br>
 * Choose attribute to be used for selection (default last).
 * <p>
 * 
 * -S num <br>
 * Numeric value to be used for selection on numeric attribute. Instances with
 * values smaller than given value will be selected. (default 0)
 * <p>
 * 
 * -L index1,index2-index4,... <br>
 * Range of label indices to be used for selection on nominal attribute. First
 * and last are valid indexes. (default all values)
 * <p>
 * 
 * -M<br>
 * Missing values count as a match. This setting is independent of the -V
 * option. (default missing values don't match)
 * <p>
 * 
 * -V<br>
 * Invert matching sense.
 * <p>
 * 
 * -H<br>
 * When selecting on nominal attributes, removes header references to excluded
 * values.
 * <p>
 * 
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class RemoveWithValues extends Filter implements UnsupervisedFilter,
        StreamableFilter, OptionHandler {

    /** The attribute's index setting. */
    private SingleIndex m_AttIndex = new SingleIndex("last");

    /** Stores which values of nominal attribute are to be used for filtering. */
    protected Range m_Values;

    /** Stores which value of a numeric attribute is to be used for filtering. */
    protected double m_Value = 0;

    /** True if missing values should count as a match */
    protected boolean m_MatchMissingValues = false;

    /** Modify header for nominal attributes? */
    protected boolean m_ModifyHeader = true;

    /** If m_ModifyHeader, stores a mapping from old to new indexes */
    protected int[] m_NominalMapping;

    /**
     * Returns a string describing this classifier
     * 
     * @return a description of the classifier suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {
        return "Filters instances according to the value of an attribute.";
    }

    /** Default constructor */
    public RemoveWithValues() {

        m_Values = new Range("first-last");
        m_Values.setInvert(true);
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    public Enumeration listOptions() {

        Vector newVector = new Vector(5);

        newVector.addElement(new Option(
                "\tChoose attribute to be used for selection.", "C", 1,
                "-C <num>"));
        newVector
                .addElement(new Option(
                        "\tNumeric value to be used for selection on numeric\n"
                                + "\tattribute.\n"
                                + "\tInstances with values smaller than given value will\n"
                                + "\tbe selected. (default 0)", "S", 1,
                        "-S <num>"));
        newVector
                .addElement(new Option(
                        "\tRange of label indices to be used for selection on\n"
                                + "\tnominal attribute.\n"
                                + "\tFirst and last are valid indexes. (default all values)",
                        "L", 1, "-L <index1,index2-index4,...>"));
        newVector.addElement(new Option(
                "\tMissing values count as a match. This setting is\n"
                        + "\tindependent of the -V option.\n"
                        + "\t(default missing values don't match)", "M", 0,
                "-M"));
        newVector.addElement(new Option("\tInvert matching sense.", "V", 0,
                "-V"));
        newVector.addElement(new Option(
                "\tWhen selecting on nominal attributes, removes header\n"
                        + "\treferences to excluded values.", "H", 0, "-H"));

        return newVector.elements();
    }

    /**
     * Parses a given list of options. Valid options are:
     * <p>
     * 
     * -C num <br>
     * Choose attribute to be used for selection (default last).
     * <p>
     * 
     * -S num <br>
     * Numeric value to be used for selection on numeric attribute. Instances
     * with values smaller than given value will be selected. (default 0)
     * <p>
     * 
     * -L index1,index2-index4,... <br>
     * Range of label indices to be used for selection on nominal attribute.
     * First and last are valid indexes. (default all values)
     * <p>
     * 
     * -M<br>
     * Missing values count as a match. This setting is independent of the -V
     * option. (default missing values don't match)
     * <p>
     * 
     * -V<br>
     * Invert matching sense.
     * <p>
     * 
     * -H<br>
     * When selecting on nominal attributes, removes header references to
     * excluded values.
     * <p>
     * 
     * @param options
     *            the list of options as an array of strings
     * @exception Exception
     *                if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {

        String attIndex = Utils.getOption('C', options);
        if (attIndex.length() != 0) {
            setAttributeIndex(attIndex);
        } else {
            setAttributeIndex("last");
        }

        String splitPoint = Utils.getOption('S', options);
        if (splitPoint.length() != 0) {
            setSplitPoint((new Double(splitPoint)).doubleValue());
        } else {
            setSplitPoint(0);
        }

        String convertList = Utils.getOption('L', options);
        if (convertList.length() != 0) {
            setNominalIndices(convertList);
        } else {
            setNominalIndices("first-last");
        }
        setInvertSelection(Utils.getFlag('V', options));
        setMatchMissingValues(Utils.getFlag('M', options));
        setModifyHeader(Utils.getFlag('H', options));
        // Re-initialize output format according to new options

        if (getInputFormat() != null) {
            setInputFormat(getInputFormat());
        }
    }

    /**
     * Gets the current settings of the filter.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    public String[] getOptions() {

        String[] options = new String[8];
        int current = 0;

        options[current++] = "-S";
        options[current++] = "" + getSplitPoint();
        options[current++] = "-C";
        options[current++] = "" + (getAttributeIndex());
        if (!getNominalIndices().equals("")) {
            options[current++] = "-L";
            options[current++] = getNominalIndices();
        }
        if (getInvertSelection()) {
            options[current++] = "-V";
        }
        if (getModifyHeader()) {
            options[current++] = "-H";
        }
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    /**
     * Sets the format of the input instances.
     * 
     * @param instanceInfo
     *            an Instances object containing the input instance structure
     *            (any instances contained in the object are ignored - only the
     *            structure is required).
     * @exception UnsupportedAttributeTypeException
     *                if the specified attribute is neither numeric or nominal.
     */
    public boolean setInputFormat(Instances instanceInfo) throws Exception {

        super.setInputFormat(instanceInfo);

        m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
        if (!isNumeric() && !isNominal()) {
            throw new UnsupportedAttributeTypeException(
                    "Can only handle numeric " + "or nominal attributes.");
        }
        m_Values.setUpper(instanceInfo.attribute(m_AttIndex.getIndex())
                .numValues() - 1);
        if (isNominal() && m_ModifyHeader) {
            instanceInfo = new Instances(instanceInfo, 0); // copy before
                                                           // modifying
            Attribute oldAtt = instanceInfo.attribute(m_AttIndex.getIndex());
            int[] selection = m_Values.getSelection();
            FastVector newVals = new FastVector();
            for (int i = 0; i < selection.length; i++) {
                newVals.addElement(oldAtt.value(selection[i]));
            }
            instanceInfo.deleteAttributeAt(m_AttIndex.getIndex());
            instanceInfo.insertAttributeAt(
                    new Attribute(oldAtt.name(), newVals), m_AttIndex
                            .getIndex());
            m_NominalMapping = new int[oldAtt.numValues()];
            for (int i = 0; i < m_NominalMapping.length; i++) {
                boolean found = false;
                for (int j = 0; j < selection.length; j++) {
                    if (selection[j] == i) {
                        m_NominalMapping[i] = j;
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    m_NominalMapping[i] = -1;
                }
            }
        }
        setOutputFormat(instanceInfo);
        return true;
    }

    /**
     * Input an instance for filtering. Ordinarily the instance is processed and
     * made available for output immediately. Some filters require all instances
     * be read before producing output.
     * 
     * @param instance
     *            the input instance

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -