⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instances.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Instances.java
 *    Copyright (C) 1999 Eibe Frank
 *
 */

package weka.core;

import java.io.*;
import java.text.ParseException;
import java.util.*;

/**
 * Class for handling an ordered set of weighted instances. <p>
 *
 * Typical usage (code from the main() method of this class): <p>
 *
 * <code>
 * ... <br>
 * 
 * // Read all the instances in the file <br>
 * reader = new FileReader(filename); <br>
 * instances = new Instances(reader); <br><br>
 *
 * // Make the last attribute be the class <br>
 * instances.setClassIndex(instances.numAttributes() - 1); <br><br>
 * 
 * // Print header and instances. <br>
 * System.out.println("\nDataset:\n"); <br> 
 * System.out.println(instances); <br><br>
 *
 * ... <br>
 * </code><p>
 *
 * All methods that change a set of instances are safe, ie. a change
 * of a set of instances does not affect any other sets of
 * instances. All methods that change a datasets's attribute
 * information clone the dataset before it is changed.
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @version $Revision: 1.1 $ 
 */
public class Instances implements Serializable {
 
  /** The filename extension that should be used for arff files */
  public static String FILE_EXTENSION = ".arff";

  /** The filename extension that should be used for bin. serialized instances files */
  public static String SERIALIZED_OBJ_FILE_EXTENSION = ".bsi";

  /** The keyword used to denote the start of an arff header */
  static String ARFF_RELATION = "@relation";

  /** The keyword used to denote the start of the arff data section */
  static String ARFF_DATA = "@data";

  /** The dataset's name. */
  protected /*@spec_public non_null@*/ String m_RelationName;         

  /** The attribute information. */
  protected /*@spec_public non_null@*/ FastVector m_Attributes;
  /*  public invariant (\forall int i; 0 <= i && i < m_Attributes.size(); 
                    m_Attributes.elementAt(i) != null);
  */

  /** The instances. */
  protected /*@spec_public non_null@*/ FastVector m_Instances;

  /** The class attribute's index */
  protected int m_ClassIndex;
  //@ protected invariant classIndex() == m_ClassIndex;

  /** Buffer of values for sparse instance */
  protected double[] m_ValueBuffer;

  /** Buffer of indices for sparse instance */
  protected int[] m_IndicesBuffer;

  /**
   * Reads an ARFF file from a reader, and assigns a weight of
   * one to each instance. Lets the index of the class 
   * attribute be undefined (negative).
   *
   * @param reader the reader
   * @exception IOException if the ARFF file is not read 
   * successfully
   */
  public Instances(/*@non_null@*/Reader reader) throws IOException {

    StreamTokenizer tokenizer;

    tokenizer = new StreamTokenizer(reader);
    initTokenizer(tokenizer);
    readHeader(tokenizer);
    m_ClassIndex = -1;
    m_Instances = new FastVector(1000);
    while (getInstance(tokenizer, true)) {};
    compactify();
  }
 
  /**
   * Reads the header of an ARFF file from a reader and 
   * reserves space for the given number of instances. Lets
   * the class index be undefined (negative).
   *
   * @param reader the reader
   * @param capacity the capacity
   * @exception IllegalArgumentException if the header is not read successfully
   * or the capacity is negative.
   * @exception IOException if there is a problem with the reader.
   */
  //@ requires capacity >= 0;
  //@ ensures classIndex() == -1;
  public Instances(/*@non_null@*/Reader reader, int capacity)
    throws IOException {

    StreamTokenizer tokenizer;

    if (capacity < 0) {
      throw new IllegalArgumentException("Capacity has to be positive!");
    }
    tokenizer = new StreamTokenizer(reader); 
    initTokenizer(tokenizer);
    readHeader(tokenizer);
    m_ClassIndex = -1;
    m_Instances = new FastVector(capacity);
  }

  /**
   * Constructor copying all instances and references to
   * the header information from the given set of instances.
   *
   * @param instances the set to be copied
   */
  public Instances(/*@non_null@*/Instances dataset) {

    this(dataset, dataset.numInstances());

    dataset.copyInstances(0, this, dataset.numInstances());
  }

  /**
   * Constructor creating an empty set of instances. Copies references
   * to the header information from the given set of instances. Sets
   * the capacity of the set of instances to 0 if its negative.
   *
   * @param instances the instances from which the header 
   * information is to be taken
   * @param capacity the capacity of the new dataset 
   */
  public Instances(/*@non_null@*/Instances dataset, int capacity) {
    
    if (capacity < 0) {
      capacity = 0;
    }
    
    // Strings only have to be "shallow" copied because
    // they can't be modified.
    m_ClassIndex = dataset.m_ClassIndex;
    m_RelationName = dataset.m_RelationName;
    m_Attributes = dataset.m_Attributes;
    m_Instances = new FastVector(capacity);
  }

  /**
   * Creates a new set of instances by copying a 
   * subset of another set.
   *
   * @param source the set of instances from which a subset 
   * is to be created
   * @param first the index of the first instance to be copied
   * @param toCopy the number of instances to be copied
   * @exception IllegalArgumentException if first and toCopy are out of range
   */
  //@ requires 0 <= first;
  //@ requires 0 <= toCopy;
  //@ requires first + toCopy <= source.numInstances();
  public Instances(/*@non_null@*/Instances source, int first, int toCopy) {
    
    this(source, toCopy);

    if ((first < 0) || ((first + toCopy) > source.numInstances())) {
      throw new IllegalArgumentException("Parameters first and/or toCopy out "+
                                         "of range");
    }
    source.copyInstances(first, this, toCopy);
  }

  /**
   * Creates an empty set of instances. Uses the given
   * attribute information. Sets the capacity of the set of 
   * instances to 0 if its negative. Given attribute information
   * must not be changed after this constructor has been used.
   *
   * @param name the name of the relation
   * @param attInfo the attribute information
   * @param capacity the capacity of the set
   */
  public Instances(/*@non_null@*/String name, 
		   /*@non_null@*/FastVector attInfo, int capacity) {

    m_RelationName = name;
    m_ClassIndex = -1;
    m_Attributes = attInfo;
    for (int i = 0; i < numAttributes(); i++) {
      attribute(i).setIndex(i);
    }
    m_Instances = new FastVector(capacity);
  }
 
  /**
   * Create a copy of the structure, but "cleanse" string types (i.e.
   * doesn't contain references to the strings seen in the past).
   *
   * @return a copy of the instance structure.
   */
  public Instances stringFreeStructure() {

    FastVector atts = (FastVector)m_Attributes.copy();
    for (int i = 0 ; i < atts.size(); i++) {
      Attribute att = (Attribute)atts.elementAt(i);
      if (att.type() == Attribute.STRING) {
        atts.setElementAt(new Attribute(att.name(), (FastVector)null), i);
      }
    }
    Instances result = new Instances(relationName(), atts, 0);
    result.m_ClassIndex = m_ClassIndex;
    return result;
  }

  /**
   * Adds one instance to the end of the set. 
   * Shallow copies instance before it is added. Increases the
   * size of the dataset if it is not large enough. Does not
   * check if the instance is compatible with the dataset.
   *
   * @param instance the instance to be added
   */
  public void add(/*@non_null@*/ Instance instance) {

    Instance newInstance = (Instance)instance.copy();

    newInstance.setDataset(this);
    m_Instances.addElement(newInstance);
  }

  /**
   * Returns an attribute.
   *
   * @param index the attribute's index
   * @return the attribute at the given position
   */
  //@ requires 0 <= index;
  //@ requires index < m_Attributes.size();
  //@ ensures \result != null;
  public /*@pure@*/ Attribute attribute(int index) {
    
    return (Attribute) m_Attributes.elementAt(index);
  }

  /**
   * Returns an attribute given its name. If there is more than
   * one attribute with the same name, it returns the first one.
   * Returns null if the attribute can't be found.
   *
   * @param name the attribute's name
   * @return the attribute with the given name, null if the
   * attribute can't be found
   */ 
  public /*@pure@*/ Attribute attribute(String name) {
    
    for (int i = 0; i < numAttributes(); i++) {
      if (attribute(i).name().equals(name)) {
	return attribute(i);
      }
    }
    return null;
  }

  /**
   * Checks for string attributes in the dataset
   *
   * @return true if string attributes are present, false otherwise
   */
  public /*@pure@*/ boolean checkForStringAttributes() {

    int i = 0;
   
    while (i < m_Attributes.size()) {
      if (attribute(i++).isString()) {
	return true;
      }
    }
    return false;
  }

  /**
   * Checks if the given instance is compatible
   * with this dataset. Only looks at the size of
   * the instance and the ranges of the values for 
   * nominal and string attributes.
   *
   * @return true if the instance is compatible with the dataset 
   */
  public /*@pure@*/ boolean checkInstance(Instance instance) {

    if (instance.numAttributes() != numAttributes()) {
      return false;
    }
    for (int i = 0; i < numAttributes(); i++) {
      if (instance.isMissing(i)) {
	continue;
      } else if (attribute(i).isNominal() ||
		 attribute(i).isString()) {
	if (!(Utils.eq(instance.value(i),
		       (double)(int)instance.value(i)))) {
	  return false;
	} else if (Utils.sm(instance.value(i), 0) ||
		   Utils.gr(instance.value(i),
			    attribute(i).numValues())) {
	  return false;
	}
      }
    }
    return true;
  }
	
  /**
   * Returns the class attribute.
   *
   * @return the class attribute
   * @exception UnassignedClassException if the class is not set
   */
  //@ requires classIndex() >= 0;
  public /*@pure@*/ Attribute classAttribute() {

    if (m_ClassIndex < 0) {
      throw new UnassignedClassException("Class index is negative (not set)!");
    }
    return attribute(m_ClassIndex);
  }

  /**
   * Returns the class attribute's index. Returns negative number
   * if it's undefined.
   *
   * @return the class index as an integer
   */
  // ensures \result == m_ClassIndex;
  public /*@pure@*/ int classIndex() {
    
    return m_ClassIndex;
  }
 
  /**
   * Compactifies the set of instances. Decreases the capacity of
   * the set so that it matches the number of instances in the set.
   */
  public void compactify() {

    m_Instances.trimToSize();
  }

  /**
   * Removes all instances from the set.
   */
  public void delete() {
    
    m_Instances = new FastVector();
  }

  /**
   * Removes an instance at the given position from the set.
   *
   * @param index the instance's position
   */
  //@ requires 0 <= index && index < numInstances();
  public void delete(int index) {
    
    m_Instances.removeElementAt(index);
  }

  /**
   * Deletes an attribute at the given position 
   * (0 to numAttributes() - 1). A deep copy of the attribute
   * information is performed before the attribute is deleted.
   *
   * @param pos the attribute's position
   * @exception IllegalArgumentException if the given index is out of range 
   *            or the class attribute is being deleted
   */
  //@ requires 0 <= position && position < numAttributes();
  //@ requires position != classIndex();
  public void deleteAttributeAt(int position) {
	 
    if ((position < 0) || (position >= m_Attributes.size())) {
      throw new IllegalArgumentException("Index out of range");
    }
    if (position == m_ClassIndex) {
      throw new IllegalArgumentException("Can't delete class attribute");
    }
    freshAttributeInfo();
    if (m_ClassIndex > position) {
      m_ClassIndex--;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -