⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 filter.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    Filter.java *    Copyright (C) 1999 Len Trigg * */package weka.filters;import java.io.BufferedReader;import java.io.FileOutputStream;import java.io.FileReader;import java.io.InputStreamReader;import java.io.PrintWriter;import java.io.Reader;import java.io.Serializable;import java.util.Enumeration;import weka.core.Attribute;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Queue;import weka.core.Utils;/**  * An abstract class for instance filters: objects that take instances * as input, carry out some transformation on the instance and then * output the instance. The method implementations in this class * assume that most of the work will be done in the methods overridden * by subclasses.<p> * * A simple example of filter use. This example doesn't remove * instances from the output queue until all instances have been * input, so has higher memory consumption than an approach that * uses output instances as they are made available:<p> * * <code> <pre> *  Filter filter = ..some type of filter.. *  Instances instances = ..some instances.. *  for (int i = 0; i < data.numInstances(); i++) { *    filter.input(data.instance(i)); *  } *  filter.batchFinished(); *  Instances newData = filter.outputFormat(); *  Instance processed; *  while ((processed = filter.output()) != null) { *    newData.add(processed); *  } *  ..do something with newData.. * </pre> </code> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public abstract class Filter implements Serializable {  /*   * Filter refactoring TODO:   *   * - Update all filters to use getOutputFormat and setInputFormat   * instead of outputFormat, outputFormatPeek and inputFormat.   * - Update users of filters to use getOutputFormat and setInputFormat   * - remove outputFormat, outputFormatPeek and inputFormat   *   */  /** Debugging mode */  private boolean m_Debug = false;  /** The output format for instances */  private Instances m_OutputFormat = null;  /** The output instance queue */  private Queue m_OutputQueue = null;  /** Indices of string attributes in the output format */  private int [] m_OutputStringAtts = null;  /** Indices of string attributes in the input format */  private int [] m_InputStringAtts = null;  /** The input format for instances */  private Instances m_InputFormat = null;  /** Record whether the filter is at the start of a batch */  protected boolean m_NewBatch = true;  /**   * Sets the format of output instances. The derived class should use this   * method once it has determined the outputformat. The    * output queue is cleared.   *   * @param outputFormat the new output format   */  protected void setOutputFormat(Instances outputFormat) {    if (outputFormat != null) {      m_OutputFormat = outputFormat.stringFreeStructure();      m_OutputStringAtts = getStringIndices(m_OutputFormat);      // Rename the attribute      String relationName = outputFormat.relationName()         + "-" + this.getClass().getName();      if (this instanceof OptionHandler) {        String [] options = ((OptionHandler)this).getOptions();        for (int i = 0; i < options.length; i++) {          relationName += options[i].trim();        }      }      m_OutputFormat.setRelationName(relationName);    } else {      m_OutputFormat = null;    }    m_OutputQueue = new Queue();  }  /**   * Gets the currently set inputformat instances. This dataset may contain   * buffered instances.   *   * @return the input Instances.   */  protected Instances getInputFormat() {    return m_InputFormat;  }  /**   * Returns a reference to the current output format without   * copying it.   *   * @return a reference to the current output format   */  protected Instances outputFormatPeek() {    return m_OutputFormat;  }  /**   * Adds an output instance to the queue. The derived class should use this   * method for each output instance it makes available.    *   * @param instance the instance to be added to the queue   */  protected void push(Instance instance) {    if (instance != null) {      copyStringValues(instance, m_OutputFormat, m_OutputStringAtts);      instance.setDataset(m_OutputFormat);      m_OutputQueue.push(instance);    }  }  /**   * Clears the output queue.   */  protected void resetQueue() {    m_OutputQueue = new Queue();  }  /**   * Adds the supplied input instance to the inputformat dataset for   * later processing.  Use this method rather than   * getInputFormat().add(instance). Or else.   *   * @param instance the <code>Instance</code> to buffer.     */  protected void bufferInput(Instance instance) {    if (instance != null) {      copyStringValues(instance, m_InputFormat, m_InputStringAtts);      instance.setDataset(m_InputFormat);      m_InputFormat.add(instance);    }  }  /**   * Returns an array containing the indices of all string attributes in the   * input format. This index is created during setInputFormat()   *   * @return an array containing the indices of string attributes in the    * input dataset.   */  protected int [] getInputStringIndex() {    return m_InputStringAtts;  }  /**   * Returns an array containing the indices of all string attributes in the   * output format. This index is created during setOutputFormat()   *   * @return an array containing the indices of string attributes in the    * output dataset.   */  protected int [] getOutputStringIndex() {    return m_OutputStringAtts;  }  /**   * Copies string values contained in the instance copied to a new   * dataset. The Instance must already be assigned to a dataset. This   * dataset and the destination dataset must have the same structure.   *   * @param instance the Instance containing the string values to copy.   * @param destDataset the destination set of Instances   * @param strAtts an array containing the indices of any string attributes   * in the dataset.     */  private void copyStringValues(Instance inst, Instances destDataset,                                 int []strAtts) {    if (strAtts.length == 0) {      return;    }    if (inst.dataset() == null) {      throw new IllegalArgumentException("Instance has no dataset assigned!!");    } else if (inst.dataset().numAttributes() != destDataset.numAttributes()) {      throw new IllegalArgumentException("Src and Dest differ in # of attributes!!");    }     copyStringValues(inst, true, inst.dataset(), strAtts,                     destDataset, strAtts);  }  /**   * Takes string values referenced by an Instance and copies them from a   * source dataset to a destination dataset. The instance references are   * updated to be valid for the destination dataset. The instance may have the    * structure (i.e. number and attribute position) of either dataset (this   * affects where references are obtained from). The source dataset must   * have the same structure as the filter input format and the destination   * must have the same structure as the filter output format.   *   * @param instance the instance containing references to strings in the source   * dataset that will have references updated to be valid for the destination   * dataset.   * @param instSrcCompat true if the instance structure is the same as the   * source, or false if it is the same as the destination   * @param srcDataset the dataset for which the current instance string   * references are valid (after any position mapping if needed)   * @param destDataset the dataset for which the current instance string   * references need to be inserted (after any position mapping if needed)   */  protected void copyStringValues(Instance instance, boolean instSrcCompat,                                  Instances srcDataset, Instances destDataset) {    copyStringValues(instance, instSrcCompat, srcDataset, m_InputStringAtts,                     destDataset, m_OutputStringAtts);  }  /**   * Takes string values referenced by an Instance and copies them from a   * source dataset to a destination dataset. The instance references are   * updated to be valid for the destination dataset. The instance may have the    * structure (i.e. number and attribute position) of either dataset (this   * affects where references are obtained from). Only works if the number   * of string attributes is the same in both indices (implicitly these string   * attributes should be semantically same but just with shifted positions).   *   * @param instance the instance containing references to strings in the source   * dataset that will have references updated to be valid for the destination   * dataset.   * @param instSrcCompat true if the instance structure is the same as the   * source, or false if it is the same as the destination (i.e. which of the   * string attribute indices contains the correct locations for this instance).   * @param srcDataset the dataset for which the current instance string   * references are valid (after any position mapping if needed)   * @param srcStrAtts an array containing the indices of string attributes   * in the source datset.   * @param destDataset the dataset for which the current instance string   * references need to be inserted (after any position mapping if needed)   * @param destStrAtts an array containing the indices of string attributes   * in the destination datset.   */  protected void copyStringValues(Instance instance, boolean instSrcCompat,                                  Instances srcDataset, int []srcStrAtts,                                  Instances destDataset, int []destStrAtts) {    if (srcDataset == destDataset) {      return;    }    if (srcStrAtts.length != destStrAtts.length) {      throw new IllegalArgumentException("Src and Dest string indices differ in length!!");    }    for (int i = 0; i < srcStrAtts.length; i++) {      int instIndex = instSrcCompat ? srcStrAtts[i] : destStrAtts[i];      Attribute src = srcDataset.attribute(srcStrAtts[i]);      Attribute dest = destDataset.attribute(destStrAtts[i]);      if (!instance.isMissing(instIndex)) {        //System.err.println(instance.value(srcIndex)         //                   + " " + src.numValues()        //                   + " " + dest.numValues());        int valIndex = dest.addStringValue(src, (int)instance.value(instIndex));        // setValue here shouldn't be too slow here unless your dataset has        // squillions of string attributes        instance.setValue(instIndex, (double)valIndex);      }    }  }  /**   * This will remove all buffered instances from the inputformat dataset.   * Use this method rather than getInputFormat().delete();   */  protected void flushInput() {    if (m_InputStringAtts.length > 0) {      m_InputFormat = m_InputFormat.stringFreeStructure();    } else {      // This more efficient than new Instances(m_InputFormat, 0);      m_InputFormat.delete();    }  }  /**   * @deprecated use <code>setInputFormat(Instances)</code> instead.   */  public boolean inputFormat(Instances instanceInfo) throws Exception {    return setInputFormat(instanceInfo);  }  /**   * Sets the format of the input instances. If the filter is able to   * determine the output format before seeing any input instances, it   * does so here. This default implementation clears the output format   * and output queue, and the new batch flag is set. Overriders should   * call <code>super.setInputFormat(Instances)</code>   *   * @param instanceInfo an Instances object containing the input instance   * structure (any instances contained in the object are ignored - only the   * structure is required).   * @return true if the outputFormat may be collected immediately   * @exception Exception if the inputFormat can't be set successfully    */  public boolean setInputFormat(Instances instanceInfo) throws Exception {    m_InputFormat = instanceInfo.stringFreeStructure();    m_InputStringAtts = getStringIndices(instanceInfo);    m_OutputFormat = null;    m_OutputQueue = new Queue();    m_NewBatch = true;    return false;  }  /**   * @deprecated use <code>getOutputFormat()</code> instead.   */  public final Instances outputFormat() {    return getOutputFormat();  }  /**   * Gets the format of the output instances. This should only be called   * after input() or batchFinished() has returned true. The relation   * name of the output instances should be changed to reflect the   * action of the filter (eg: add the filter name and options).   *   * @return an Instances object containing the output instance   * structure only.   * @exception NullPointerException if no input structure has been   * defined (or the output format hasn't been determined yet)    */  public final Instances getOutputFormat() {    if (m_OutputFormat == null) {      throw new NullPointerException("No output format defined.");    }    return new Instances(m_OutputFormat, 0);  }  /**   * Input an instance for filtering. Ordinarily the instance is   * processed and made available for output immediately. Some filters   * require all instances be read before producing output, in which   * case output instances should be collected after calling   * batchFinished(). If the input marks the start of a new batch, the   * output queue is cleared. This default implementation assumes all   * instance conversion will occur when batchFinished() is called.   *   * @param instance the input instance   * @return true if the filtered instance may now be   * collected with output().   * @exception NullPointerException if the input format has not been   * defined.   * @exception Exception if the input instance was not of the correct    * format or if there was a problem with the filtering.     */  public boolean input(Instance instance) throws Exception {    if (m_InputFormat == null) {      throw new NullPointerException("No input instance format defined");    }    if (m_NewBatch) {      m_OutputQueue = new Queue();      m_NewBatch = false;    }    bufferInput(instance);    return false;  }  /**   * Signify that this batch of input to the filter is finished. If   * the filter requires all instances prior to filtering, output()   * may now be called to retrieve the filtered instances. Any   * subsequent instances filtered should be filtered based on setting   * obtained from the first batch (unless the inputFormat has been   * re-assigned or new options have been set). This default   * implementation assumes all instance processing occurs during   * inputFormat() and input().   *   * @return true if there are instances pending output   * @exception NullPointerException if no input structure has been defined,   * @exception Exception if there was a problem finishing the batch.   */  public boolean batchFinished() throws Exception {    if (m_InputFormat == null) {      throw new NullPointerException("No input instance format defined");    }    flushInput();    m_NewBatch = true;    return (numPendingOutput() != 0);  }  /**   * Output an instance after filtering and remove from the output queue.   *   * @return the instance that has most recently been filtered (or null if   * the queue is empty).   * @exception NullPointerException if no output structure has been defined   */  public Instance output() {    if (m_OutputFormat == null) {      throw new NullPointerException("No output instance format defined");    }    if (m_OutputQueue.empty()) {      return null;    }    Instance result = (Instance)m_OutputQueue.pop();    // Clear out references to old strings occasionally    if (m_OutputQueue.empty() && m_NewBatch) {      if (m_OutputStringAtts.length > 0) {        m_OutputFormat = m_OutputFormat.stringFreeStructure();      }    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -