⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 removewithvalues.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    RemoveWithValues.java
 *    Copyright (C) 1999 Eibe Frank
 *
 */


package weka.filters.unsupervised.instance;

import weka.filters.*;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.SingleIndex;

/** 
 * Filters instances according to the value of an attribute.<p>
 *
 * Valid filter-specific options are:<p>
 *
 * -C num<br>
 * Choose attribute to be used for selection (default last).<p>
 *
 * -S num<br>
 * Numeric value to be used for selection on numeric attribute.
 * Instances with values smaller than given value will be selected.
 * (default 0) <p>
 *
 * -L index1,index2-index4,...<br>
 * Range of label indices to be used for selection on nominal attribute.
 * First and last are valid indexes. (default all values)<p>
 *
 * -M <br>
 * Missing values count as a match. This setting is independent of
 * the -V option. (default missing values don't match)<p>
 *
 * -V<br>
 * Invert matching sense.<p>
 *
 * -H<br>
 * When selecting on nominal attributes, removes header references to
 * excluded values. <p>
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @version $Revision: 1.1 $
 */
public class RemoveWithValues extends Filter
  implements UnsupervisedFilter, StreamableFilter, OptionHandler {

  /** The attribute's index setting. */
  private SingleIndex m_AttIndex = new SingleIndex("last"); 
  
  /** Stores which values of nominal attribute are to be used for filtering.*/
  protected Range m_Values;

  /** Stores which value of a numeric attribute is to be used for filtering.*/
  protected double m_Value = 0;

  /** True if missing values should count as a match */
  protected boolean m_MatchMissingValues = false;

  /** Modify header for nominal attributes? */
  protected boolean m_ModifyHeader = false;

  /** If m_ModifyHeader, stores a mapping from old to new indexes */
  protected int [] m_NominalMapping;

  /**
   * Returns a string describing this classifier
   * @return a description of the classifier suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Filters instances according to the value of an attribute.";
  }

  /** Default constructor */
  public RemoveWithValues() {

      m_Values = new Range("first-last");
      m_Values.setInvert(true);
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(5);

    newVector.addElement(new Option(
              "\tChoose attribute to be used for selection.",
              "C", 1, "-C <num>"));
    newVector.addElement(new Option(
              "\tNumeric value to be used for selection on numeric\n"+
	      "\tattribute.\n"+
	      "\tInstances with values smaller than given value will\n"+
              "\tbe selected. (default 0)",
              "S", 1, "-S <num>"));
    newVector.addElement(new Option(
              "\tRange of label indices to be used for selection on\n"+
	      "\tnominal attribute.\n"+
	      "\tFirst and last are valid indexes. (default all values)",
              "L", 1, "-L <index1,index2-index4,...>"));
    newVector.addElement(new Option(
	      "\tMissing values count as a match. This setting is\n"+
              "\tindependent of the -V option.\n"+
              "\t(default missing values don't match)",
              "M", 0, "-M"));
    newVector.addElement(new Option(
	      "\tInvert matching sense.",
              "V", 0, "-V"));
    newVector.addElement(new Option(
	      "\tWhen selecting on nominal attributes, removes header\n"
	      + "\treferences to excluded values.",
              "H", 0, "-H"));

    return newVector.elements();
  }


  /**
   * Parses a given list of options.
   * Valid options are:<p>
   *
   * -C num<br>
   * Choose attribute to be used for selection (default last).<p>
   *
   * -S num<br>
   * Numeric value to be used for selection on numeric attribute.
   * Instances with values smaller than given value will be selected.
   * (default 0) <p>
   *
   * -L index1,index2-index4,...<br>
   * Range of label indices to be used for selection on nominal attribute.
   * First and last are valid indexes. (default all values)<p>
   *
   * -M <br>
   * Missing values count as a match. This setting is independent of
   * the -V option. (default missing values don't match)<p>
   *
   * -V<br>
   * Invert matching sense.<p>
   *
   * -H<br>
   * When selecting on nominal attributes, removes header references to
   * excluded values. <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String attIndex = Utils.getOption('C', options);
    if (attIndex.length() != 0) {
      setAttributeIndex(attIndex);
    } else {
      setAttributeIndex("last");
    }
    
    String splitPoint = Utils.getOption('S', options);
    if (splitPoint.length() != 0) {
      setSplitPoint((new Double(splitPoint)).doubleValue());
    } else {
      setSplitPoint(0);
    }

    String convertList = Utils.getOption('L', options);
    if (convertList.length() != 0) {
      setNominalIndices(convertList);
    } else {
      setNominalIndices("first-last");
    }
    setInvertSelection(Utils.getFlag('V', options));
    setMatchMissingValues(Utils.getFlag('M', options));
    setModifyHeader(Utils.getFlag('H', options));
    // Re-initialize output format according to new options
    
    if (getInputFormat() != null) {
      setInputFormat(getInputFormat());
    }
  }

  /**
   * Gets the current settings of the filter.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String [] getOptions() {

    String [] options = new String [9];
    int current = 0;

    options[current++] = "-S"; options[current++] = "" + getSplitPoint();
    options[current++] = "-C";
    options[current++] = "" + (getAttributeIndex());
    if (!getNominalIndices().equals("")) {
      options[current++] = "-L"; options[current++] = getNominalIndices();
    }
    if (getInvertSelection()) {
      options[current++] = "-V";
    }
    if (getMatchMissingValues()) {
      options[current++] = "-M";
    }
    if (getModifyHeader()) {
      options[current++] = "-H";
    }
    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }

  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input instance
   * structure (any instances contained in the object are ignored - only the
   * structure is required).
   * @exception UnsupportedAttributeTypeException if the specified attribute
   * is neither numeric or nominal.
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    super.setInputFormat(instanceInfo);

    m_AttIndex.setUpper(instanceInfo.numAttributes() - 1);
    if (!isNumeric() && !isNominal()) {
      throw new UnsupportedAttributeTypeException("Can only handle numeric " +
						  "or nominal attributes.");
    }
    m_Values.setUpper(instanceInfo.attribute(m_AttIndex.getIndex()).numValues() - 1);
    if (isNominal() && m_ModifyHeader) {
      instanceInfo = new Instances(instanceInfo, 0); // copy before modifying
      Attribute oldAtt = instanceInfo.attribute(m_AttIndex.getIndex());
      int [] selection = m_Values.getSelection();
      FastVector newVals = new FastVector();
      for (int i = 0; i < selection.length; i++) {
	newVals.addElement(oldAtt.value(selection[i]));
      }
      instanceInfo.deleteAttributeAt(m_AttIndex.getIndex());
      instanceInfo.insertAttributeAt(new Attribute(oldAtt.name(), newVals),
				      m_AttIndex.getIndex());
      m_NominalMapping = new int [oldAtt.numValues()];
      for (int i = 0; i < m_NominalMapping.length; i++) {
	boolean found = false;
	for (int j = 0; j < selection.length; j++) {
	  if (selection[j] == i) {
	    m_NominalMapping[i] = j;
	    found = true;
	    break;
	  }
	}
	if (!found) {
	  m_NominalMapping[i] = -1;
	}
      }
    }
    setOutputFormat(instanceInfo);
    return true;
  }

  /**
   * Input an instance for filtering. Ordinarily the instance is processed
   * and made available for output immediately. Some filters require all
   * instances be read before producing output.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be
   * collected with output().
   * @exception IllegalStateException if no input format has been set.
   */
  public boolean input(Instance instance) {

    if (getInputFormat() == null) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -