⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 filter.java

📁 一个数据挖掘系统的源码
💻 JAVA
📖 第 1 页 / 共 3 页
字号:

/**
 *
 *   AgentAcademy - an open source Data Mining framework for
 *   training intelligent agents
 *
 *   Copyright (C)   2001-2003 AA Consortium.
 *
 *   This library is open source software; you can redistribute it
 *   and/or modify it under the terms of the GNU Lesser General
 *   Public License as published by the Free Software Foundation;
 *   either version 2.0 of the License, or (at your option) any later
 *   version.
 *
 *   This library is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public
 *   License along with this library; if not, write to the Free
 *   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *   MA  02111-1307 USA
 *
 */

package org.agentacademy.modules.dataminer.filters;

/**
 * <p>Title: The Data Miner prototype</p>
 * <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
 * <p>Copyright: Copyright (c) 2002</p>
 * <p>Company: CERTH</p>
 * @author asymeon
 * @version 0.3
 */

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Serializable;
import java.util.Enumeration;
import org.agentacademy.modules.dataminer.core.*;
import org.apache.log4j.Logger;

/**
 * An abstract class for instance filters: objects that take instances
 * as input, carry out some transformation on the instance and then
 * output the instance. The method implementations in this class
 * assume that most of the work will be done in the methods overridden
 * by subclasses.<p>
 *
 * A simple example of filter use. This example doesn't remove
 * instances from the output queue until all instances have been
 * input, so has higher memory consumption than an approach that
 * uses output instances as they are made available:<p>
 *
 * <code> <pre>
 *  Filter filter = ..some type of filter..
 *  Instances instances = ..some instances..
 *  for (int i = 0; i < data.numInstances(); i++) {
 *    filter.input(data.instance(i));
 *  }
 *  filter.batchFinished();
 *  Instances newData = filter.outputFormat();
 *  Instance processed;
 *  while ((processed = filter.output()) != null) {
 *    newData.add(processed);
 *  }
 *  ..do something with newData..
 * </pre> </code>
 *
 */
public abstract class Filter implements Serializable {

  /*
   * Filter refactoring TODO:
   *
   * - Update all filters to use getOutputFormat and setInputFormat
   * instead of outputFormat, outputFormatPeek and inputFormat.
   * - Update users of filters to use getOutputFormat and setInputFormat
   * - remove outputFormat, outputFormatPeek and inputFormat
   *
   */

 public static Logger                log = Logger.getLogger(Filter.class);
  /** Debugging mode */
  private boolean m_Debug = false;

  /** The output format for instances */
  private Instances m_OutputFormat = null;

  /** The output instance queue */
  private Queue m_OutputQueue = null;

  /** Indices of string attributes in the output format */
  private int [] m_OutputStringAtts = null;

  /** Indices of string attributes in the input format */
  private int [] m_InputStringAtts = null;

  /** The input format for instances */
  private Instances m_InputFormat = null;

  /** Record whether the filter is at the start of a batch */
  protected boolean m_NewBatch = true;

  /**
   * Sets the format of output instances. The derived class should use this
   * method once it has determined the outputformat. The
   * output queue is cleared.
   *
   * @param outputFormat the new output format
   */
  protected void setOutputFormat(Instances outputFormat) {

    if (outputFormat != null) {
      m_OutputFormat = outputFormat.stringFreeStructure();
      m_OutputStringAtts = getStringIndices(m_OutputFormat);

      // Rename the attribute
      String relationName = outputFormat.relationName()
        + "-" + this.getClass().getName();
      if (this instanceof OptionHandler) {
        String [] options = ((OptionHandler)this).getOptions();
        for (int i = 0; i < options.length; i++) {
          relationName += options[i].trim();
        }
      }
      m_OutputFormat.setRelationName(relationName);
    } else {
      m_OutputFormat = null;
    }
    m_OutputQueue = new Queue();
  }

  /**
   * Gets the currently set inputformat instances. This dataset may contain
   * buffered instances.
   *
   * @return the input Instances.
   */
  protected Instances getInputFormat() {

    return m_InputFormat;
  }

  /**
   * Returns a reference to the current output format without
   * copying it.
   *
   * @return a reference to the current output format
   */
  protected Instances outputFormatPeek() {

    return m_OutputFormat;
  }

  /**
   * Adds an output instance to the queue. The derived class should use this
   * method for each output instance it makes available.
   *
   * @param instance the instance to be added to the queue
   */
  protected void push(Instance instance) {

    if (instance != null) {
      copyStringValues(instance, m_OutputFormat, m_OutputStringAtts);
      instance.setDataset(m_OutputFormat);
      m_OutputQueue.push(instance);
    }
  }

  /**
   * Clears the output queue.
   */
  protected void resetQueue() {

    m_OutputQueue = new Queue();
  }

  /**
   * Adds the supplied input instance to the inputformat dataset for
   * later processing.  Use this method rather than
   * getInputFormat().add(instance). Or else.
   *
   * @param instance the <code>Instance</code> to buffer.
   */
  protected void bufferInput(Instance instance) {

    if (instance != null) {
      copyStringValues(instance, m_InputFormat, m_InputStringAtts);
      instance.setDataset(m_InputFormat);
      m_InputFormat.add(instance);
    }
  }

  /**
   * Returns an array containing the indices of all string attributes in the
   * input format. This index is created during setInputFormat()
   *
   * @return an array containing the indices of string attributes in the
   * input dataset.
   */
  protected int [] getInputStringIndex() {

    return m_InputStringAtts;
  }

  /**
   * Returns an array containing the indices of all string attributes in the
   * output format. This index is created during setOutputFormat()
   *
   * @return an array containing the indices of string attributes in the
   * output dataset.
   */
  protected int [] getOutputStringIndex() {

    return m_OutputStringAtts;
  }

  /**
   * Copies string values contained in the instance copied to a new
   * dataset. The Instance must already be assigned to a dataset. This
   * dataset and the destination dataset must have the same structure.
   *
   * @param instance the Instance containing the string values to copy.
   * @param destDataset the destination set of Instances
   * @param strAtts an array containing the indices of any string attributes
   * in the dataset.
   */
  private void copyStringValues(Instance inst, Instances destDataset,
                                int []strAtts) {

    if (strAtts.length == 0) {
      return;
    }
    if (inst.dataset() == null) {
      throw new IllegalArgumentException("Instance has no dataset assigned!!");
    } else if (inst.dataset().numAttributes() != destDataset.numAttributes()) {
      throw new IllegalArgumentException("Src and Dest differ in # of attributes!!");
    }
    copyStringValues(inst, true, inst.dataset(), strAtts,
                     destDataset, strAtts);
  }

  /**
   * Takes string values referenced by an Instance and copies them from a
   * source dataset to a destination dataset. The instance references are
   * updated to be valid for the destination dataset. The instance may have the
   * structure (i.e. number and attribute position) of either dataset (this
   * affects where references are obtained from). The source dataset must
   * have the same structure as the filter input format and the destination
   * must have the same structure as the filter output format.
   *
   * @param instance the instance containing references to strings in the source
   * dataset that will have references updated to be valid for the destination
   * dataset.
   * @param instSrcCompat true if the instance structure is the same as the
   * source, or false if it is the same as the destination
   * @param srcDataset the dataset for which the current instance string
   * references are valid (after any position mapping if needed)
   * @param destDataset the dataset for which the current instance string
   * references need to be inserted (after any position mapping if needed)
   */
  protected void copyStringValues(Instance instance, boolean instSrcCompat,
                                  Instances srcDataset, Instances destDataset) {

    copyStringValues(instance, instSrcCompat, srcDataset, m_InputStringAtts,
                     destDataset, m_OutputStringAtts);
  }

  /**
   * Takes string values referenced by an Instance and copies them from a
   * source dataset to a destination dataset. The instance references are
   * updated to be valid for the destination dataset. The instance may have the
   * structure (i.e. number and attribute position) of either dataset (this
   * affects where references are obtained from). Only works if the number
   * of string attributes is the same in both indices (implicitly these string
   * attributes should be semantically same but just with shifted positions).
   *
   * @param instance the instance containing references to strings in the source
   * dataset that will have references updated to be valid for the destination
   * dataset.
   * @param instSrcCompat true if the instance structure is the same as the
   * source, or false if it is the same as the destination (i.e. which of the
   * string attribute indices contains the correct locations for this instance).
   * @param srcDataset the dataset for which the current instance string
   * references are valid (after any position mapping if needed)
   * @param srcStrAtts an array containing the indices of string attributes
   * in the source datset.
   * @param destDataset the dataset for which the current instance string
   * references need to be inserted (after any position mapping if needed)
   * @param destStrAtts an array containing the indices of string attributes
   * in the destination datset.
   */
  protected void copyStringValues(Instance instance, boolean instSrcCompat,
                                  Instances srcDataset, int []srcStrAtts,
                                  Instances destDataset, int []destStrAtts) {
    if (srcDataset == destDataset) {
      return;
    }
    if (srcStrAtts.length != destStrAtts.length) {
      throw new IllegalArgumentException("Src and Dest string indices differ in length!!");
    }
    for (int i = 0; i < srcStrAtts.length; i++) {
      int instIndex = instSrcCompat ? srcStrAtts[i] : destStrAtts[i];
      Attribute src = srcDataset.attribute(srcStrAtts[i]);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -