⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instances.java

📁 :<<数据挖掘--实用机器学习技术及java实现>>一书的配套源程序
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    Instances.java *    Copyright (C) 1999 Eibe Frank * */package weka.core;import java.io.*;import java.util.*;/** * Class for handling an ordered set of weighted instances. <p> * * Typical usage (code from the main() method of this class): <p> * * <code> * ... <br> *  * // Read all the instances in the file <br> * reader = new FileReader(filename); <br> * instances = new Instances(reader); <br><br> * * // Make the last attribute be the class <br> * instances.setClassIndex(instances.numAttributes() - 1); <br><br> *  * // Print header and instances. <br> * System.out.println("\nDataset:\n"); <br>  * System.out.println(instances); <br><br> * * ... <br> * </code><p> * * All methods that change a set of instances are safe, ie. a change * of a set of instances does not affect any other sets of * instances. All methods that change a datasets's attribute * information clone the dataset before it is changed. * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Len Trigg (trigg@cs.waikato.ac.nz) * @version $Revision: 1.32.2.1 $  */public class Instances implements Serializable {   /** The filename extension that should be used for arff files */  public static String FILE_EXTENSION = ".arff";  /** The dataset's name. */  protected String m_RelationName;           /** The attribute information. */  protected FastVector m_Attributes;  /** The instances. */  protected FastVector m_Instances;  /** The class attribute's index */  protected int m_ClassIndex;  /** Buffer of values for sparse instance */  protected double[] m_ValueBuffer;  /** Buffer of indices for sparse instance */  protected int[] m_IndicesBuffer;  /**   * Reads an ARFF file from a reader, and assigns a weight of   * one to each instance. Lets the index of the class    * attribute be undefined (negative).   *   * @param reader the reader   * @exception IOException if the ARFF file is not read    * successfully   */  public Instances(Reader reader) throws IOException {    StreamTokenizer tokenizer;    tokenizer = new StreamTokenizer(reader);    initTokenizer(tokenizer);    readHeader(tokenizer);    m_ClassIndex = -1;    m_Instances = new FastVector(1000);    while (getInstance(tokenizer, true)) {};    compactify();  }   /**   * Reads the header of an ARFF file from a reader and    * reserves space for the given number of instances. Lets   * the class index be undefined (negative).   *   * @param reader the reader   * @param capacity the capacity   * @exception IllegalArgumentException if the header is not read successfully   * or the capacity is negative.   * @exception IOException if there is a problem with the reader.   */   public Instances(Reader reader, int capacity) throws IOException {    StreamTokenizer tokenizer;    if (capacity < 0) {      throw new IllegalArgumentException("Capacity has to be positive!");    }    tokenizer = new StreamTokenizer(reader);     initTokenizer(tokenizer);    readHeader(tokenizer);    m_ClassIndex = -1;    m_Instances = new FastVector(capacity);  }  /**   * Constructor copying all instances and references to   * the header information from the given set of instances.   *   * @param instances the set to be copied   */  public Instances(Instances dataset) {    this(dataset, dataset.numInstances());    dataset.copyInstances(0, this, dataset.numInstances());  }  /**   * Constructor creating an empty set of instances. Copies references   * to the header information from the given set of instances. Sets   * the capacity of the set of instances to 0 if its negative.   *   * @param instances the instances from which the header    * information is to be taken   * @param capacity the capacity of the new dataset    */  public Instances(Instances dataset, int capacity) {        if (capacity < 0) {      capacity = 0;    }        // Strings only have to be "shallow" copied because    // they can't be modified.    m_ClassIndex = dataset.m_ClassIndex;    m_RelationName = dataset.m_RelationName;    m_Attributes = dataset.m_Attributes;    m_Instances = new FastVector(capacity);  }  /**   * Creates a new set of instances by copying a    * subset of another set.   *   * @param source the set of instances from which a subset    * is to be created   * @param first the index of the first instance to be copied   * @param toCopy the number of instances to be copied   * @exception IllegalArgumentException if first and toCopy are out of range   */  public Instances(Instances source, int first, int toCopy) {        this(source, toCopy);    if ((first < 0) || ((first + toCopy) > source.numInstances())) {      throw new IllegalArgumentException("Parameters first and/or toCopy out "+                                         "of range");    }    source.copyInstances(first, this, toCopy);  }  /**   * Creates an empty set of instances. Uses the given   * attribute information. Sets the capacity of the set of    * instances to 0 if its negative. Given attribute information   * must not be changed after this constructor has been used.   *   * @param name the name of the relation   * @param attInfo the attribute information   * @param capacity the capacity of the set   */  public Instances(String name, FastVector attInfo, int capacity) {    m_RelationName = name;    m_ClassIndex = -1;    m_Attributes = attInfo;    for (int i = 0; i < numAttributes(); i++) {      attribute(i).setIndex(i);    }    m_Instances = new FastVector(capacity);  }   /**   * Create a copy of the structure, but "cleanse" string types (i.e.   * doesn't contain references to the strings seen in the past).   *   * @return a copy of the instance structure.   */  public Instances stringFreeStructure() {    FastVector atts = (FastVector)m_Attributes.copy();    for (int i = 0 ; i < atts.size(); i++) {      Attribute att = (Attribute)atts.elementAt(i);      if (att.type() == Attribute.STRING) {        atts.setElementAt(new Attribute(att.name(), null), i);      }    }    Instances result = new Instances(relationName(), atts, 0);    result.m_ClassIndex = m_ClassIndex;    return result;  }  /**   * Adds one instance to the end of the set.    * Shallow copies instance before it is added. Increases the   * size of the dataset if it is not large enough. Does not   * check if the instance is compatible with the dataset.   *   * @param instance the instance to be added   */  public final void add(Instance instance) {    Instance newInstance = (Instance)instance.copy();    newInstance.setDataset(this);    m_Instances.addElement(newInstance);  }  /**   * Returns an attribute.   *   * @param index the attribute's index   * @return the attribute at the given position   */   public final Attribute attribute(int index) {        return (Attribute) m_Attributes.elementAt(index);  }  /**   * Returns an attribute given its name. If there is more than   * one attribute with the same name, it returns the first one.   * Returns null if the attribute can't be found.   *   * @param name the attribute's name   * @return the attribute with the given name, null if the   * attribute can't be found   */   public final Attribute attribute(String name) {        for (int i = 0; i < numAttributes(); i++) {      if (attribute(i).name().equals(name)) {	return attribute(i);      }    }    return null;  }  /**   * Checks for string attributes in the dataset   *   * @return true if string attributes are present, false otherwise   */  public boolean checkForStringAttributes() {    int i = 0;       while (i < m_Attributes.size()) {      if (attribute(i++).isString()) {	return true;      }    }    return false;  }  /**   * Checks if the given instance is compatible   * with this dataset. Only looks at the size of   * the instance and the ranges of the values for    * nominal and string attributes.   *   * @return true if the instance is compatible with the dataset    */  public final boolean checkInstance(Instance instance) {    if (instance.numAttributes() != numAttributes()) {      return false;    }    for (int i = 0; i < numAttributes(); i++) {      if (instance.isMissing(i)) {	continue;      } else if (attribute(i).isNominal() ||		 attribute(i).isString()) {	if (!(Utils.eq(instance.value(i),		       (double)(int)instance.value(i)))) {	  return false;	} else if (Utils.sm(instance.value(i), 0) ||		   Utils.gr(instance.value(i),			    attribute(i).numValues())) {	  return false;	}      }    }    return true;  }	  /**   * Returns the class attribute.   *   * @return the class attribute   * @exception UnassignedClassException if the class is not set   */  public final Attribute classAttribute() {    if (m_ClassIndex < 0) {      throw new UnassignedClassException("Class index is negative (not set)!");    }    return attribute(m_ClassIndex);  }  /**   * Returns the class attribute's index. Returns negative number   * if it's undefined.   *   * @return the class index as an integer   */  public final int classIndex() {        return m_ClassIndex;  }   /**   * Compactifies the set of instances. Decreases the capacity of   * the set so that it matches the number of instances in the set.   */  public final void compactify() {    m_Instances.trimToSize();  }  /**   * Removes all instances from the set.   */  public final void delete() {        m_Instances = new FastVector();  }  /**   * Removes an instance at the given position from the set.   *   * @param index the instance's position   */  public final void delete(int index) {        m_Instances.removeElementAt(index);  }  /**   * Deletes an attribute at the given position    * (0 to numAttributes() - 1). A deep copy of the attribute   * information is performed before the attribute is deleted.   *   * @param pos the attribute's position   * @exception IllegalArgumentException if the given index is out of range or the   * class attribute is being deleted   */  public void deleteAttributeAt(int position) {	     if ((position < 0) || (position >= m_Attributes.size())) {      throw new IllegalArgumentException("Index out of range");    }    if (position == m_ClassIndex) {      throw new IllegalArgumentException("Can't delete class attribute");    }    freshAttributeInfo();    if (m_ClassIndex > position) {      m_ClassIndex--;    }    m_Attributes.removeElementAt(position);    for (int i = position; i < m_Attributes.size(); i++) {      Attribute current = (Attribute)m_Attributes.elementAt(i);      current.setIndex(current.index() - 1);    }    for (int i = 0; i < numInstances(); i++) {      instance(i).forceDeleteAttributeAt(position);     }  }  /**   * Deletes all string attributes in the dataset. A deep copy of the attribute   * information is performed before an attribute is deleted.   *   * @exception IllegalArgumentException if string attribute couldn't be    * successfully deleted (probably because it is the class attribute).   */  public void deleteStringAttributes() {    int i = 0;    while (i < m_Attributes.size()) {      if (attribute(i).isString()) {	deleteAttributeAt(i);      } else {	i++;      }    }  }  /**   * Removes all instances with missing values for a particular   * attribute from the dataset.   *   * @param attIndex the attribute's index   */  public final void deleteWithMissing(int attIndex) {    FastVector newInstances = new FastVector(numInstances());

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -