📄 instances.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * Instances.java * Copyright (C) 1999 Eibe Frank * */package weka.core;import java.io.*;import java.text.ParseException;import java.util.*;/** * Class for handling an ordered set of weighted instances. <p> * * Typical usage (code from the main() method of this class): <p> * * <code> * ... <br> * * // Read all the instances in the file <br> * reader = new FileReader(filename); <br> * instances = new Instances(reader); <br><br> * * // Make the last attribute be the class <br> * instances.setClassIndex(instances.numAttributes() - 1); <br><br> * * // Print header and instances. <br> * System.out.println("\nDataset:\n"); <br> * System.out.println(instances); <br><br> * * ... <br> * </code><p> * * All methods that change a set of instances are safe, ie. a change * of a set of instances does not affect any other sets of * instances. All methods that change a datasets's attribute * information clone the dataset before it is changed. * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Len Trigg (trigg@cs.waikato.ac.nz) * @version $Revision: 1.2 $ */public class Instances implements Serializable { /** The filename extension that should be used for arff files */ public static String FILE_EXTENSION = ".arff"; /** The dataset's name. */ protected String m_RelationName; /** The attribute information. */ protected FastVector m_Attributes; /** The instances. */ protected FastVector m_Instances; /** Index in ranges for MIN and MAX and WIDTH */ public static int R_MIN = 0; public static int R_MAX = 1; public static int R_WIDTH = 2; /** The class attribute's index */ protected int m_ClassIndex; /** Buffer of values for sparse instance */ protected double[] m_ValueBuffer; /** Buffer of indices for sparse instance */ protected int[] m_IndicesBuffer; /** Ranges of instances */ protected double[][] m_Ranges; /** * Reads an ARFF file from a reader, and assigns a weight of * one to each instance. Lets the index of the class * attribute be undefined (negative). * * @param reader the reader * @exception IOException if the ARFF file is not read * successfully */ public Instances(Reader reader) throws IOException { StreamTokenizer tokenizer; tokenizer = new StreamTokenizer(reader); initTokenizer(tokenizer); readHeader(tokenizer); m_ClassIndex = -1; m_Instances = new FastVector(1000); while (getInstance(tokenizer, true)) {}; compactify(); } /** * Reads the header of an ARFF file from a reader and * reserves space for the given number of instances. Lets * the class index be undefined (negative). * * @param reader the reader * @param capacity the capacity * @exception IllegalArgumentException if the header is not read successfully * or the capacity is negative. * @exception IOException if there is a problem with the reader. */ public Instances(Reader reader, int capacity) throws IOException { StreamTokenizer tokenizer; if (capacity < 0) { throw new IllegalArgumentException("Capacity has to be positive!"); } tokenizer = new StreamTokenizer(reader); initTokenizer(tokenizer); readHeader(tokenizer); m_ClassIndex = -1; m_Instances = new FastVector(capacity); } /** * Constructor copying all instances and references to * the header information from the given set of instances. * * @param instances the set to be copied */ public Instances(Instances dataset) { this(dataset, dataset.numInstances()); dataset.copyInstances(0, this, dataset.numInstances()); } /** * Constructor creating an empty set of instances. Copies references * to the header information from the given set of instances. Sets * the capacity of the set of instances to 0 if its negative. * * @param instances the instances from which the header * information is to be taken * @param capacity the capacity of the new dataset */ public Instances(Instances dataset, int capacity) { if (capacity < 0) { capacity = 0; } // Strings only have to be "shallow" copied because // they can't be modified. m_ClassIndex = dataset.m_ClassIndex; m_RelationName = dataset.m_RelationName; m_Attributes = dataset.m_Attributes; m_Instances = new FastVector(capacity); } /** * Creates a new set of instances by copying a * subset of another set. * * @param source the set of instances from which a subset * is to be created * @param first the index of the first instance to be copied * @param toCopy the number of instances to be copied * @exception IllegalArgumentException if first and toCopy are out of range */ public Instances(Instances source, int first, int toCopy) { this(source, toCopy); if ((first < 0) || ((first + toCopy) > source.numInstances())) { throw new IllegalArgumentException("Parameters first and/or toCopy out "+ "of range"); } source.copyInstances(first, this, toCopy); } /** * Creates an empty set of instances. Uses the given * attribute information. Sets the capacity of the set of * instances to 0 if its negative. Given attribute information * must not be changed after this constructor has been used. * * @param name the name of the relation * @param attInfo the attribute information * @param capacity the capacity of the set */ public Instances(String name, FastVector attInfo, int capacity) { m_RelationName = name; m_ClassIndex = -1; m_Attributes = attInfo; for (int i = 0; i < numAttributes(); i++) { attribute(i).setIndex(i); } m_Instances = new FastVector(capacity); } /** * Create a copy of the structure, but "cleanse" string types (i.e. * doesn't contain references to the strings seen in the past). * * @return a copy of the instance structure. */ public Instances stringFreeStructure() { FastVector atts = (FastVector)m_Attributes.copy(); for (int i = 0 ; i < atts.size(); i++) { Attribute att = (Attribute)atts.elementAt(i); if (att.type() == Attribute.STRING) { atts.setElementAt(new Attribute(att.name(), (FastVector)null), i); } } Instances result = new Instances(relationName(), atts, 0); result.m_ClassIndex = m_ClassIndex; return result; } /** * Adds one instance to the end of the set. * Shallow copies instance before it is added. Increases the * size of the dataset if it is not large enough. Does not * check if the instance is compatible with the dataset. * * @param instance the instance to be added */ public final void add(Instance instance) { Instance newInstance = (Instance)instance.copy(); newInstance.setDataset(this); m_Instances.addElement(newInstance); } /** * Adds one instance to the end of the set, with given weight. * Shallow copies instance before it is added. Increases the size of * the dataset if it is not large enough. Does not check if the * instance is compatible with the dataset. * * @param instance the instance to be added */ public final void add(Instance instance, double weight) { Instance newInstance = (Instance)instance.copy(); newInstance.setDataset(this); newInstance.setWeight(weight); m_Instances.addElement(newInstance); } /** * Returns an attribute. * * @param index the attribute's index * @return the attribute at the given position */ public final Attribute attribute(int index) { return (Attribute) m_Attributes.elementAt(index); } /** * Returns an attribute given its name. If there is more than * one attribute with the same name, it returns the first one. * Returns null if the attribute can't be found. * * @param name the attribute's name * @return the attribute with the given name, null if the * attribute can't be found */ public final Attribute attribute(String name) { for (int i = 0; i < numAttributes(); i++) { if (attribute(i).name().equals(name)) { return attribute(i); } } return null; } /** * Checks for string attributes in the dataset * * @return true if string attributes are present, false otherwise */ public boolean checkForStringAttributes() { int i = 0; while (i < m_Attributes.size()) { if (attribute(i++).isString()) { return true; } } return false; } //=============================BEGIN EDIT mbilenko========================== /** * Checks for nominal attributes in the dataset * * @return true if nominal attributes are present, false otherwise. Class attribute * is not checked (it may be nominal). * */ public boolean checkForNominalAttributes() { int i = 0; while (i < m_Attributes.size()) { if (i != m_ClassIndex && attribute(i).isNominal()) { return true; } i++; } return false; } //=============================END EDIT mbilenko========================== /** * Checks if the given instance is compatible * with this dataset. Only looks at the size of * the instance and the ranges of the values for * nominal and string attributes. * * @return true if the instance is compatible with the dataset */ public final boolean checkInstance(Instance instance) { if (instance.numAttributes() != numAttributes()) { return false; } for (int i = 0; i < numAttributes(); i++) { if (instance.isMissing(i)) { continue; } else if (attribute(i).isNominal() || attribute(i).isString()) { if (!(Utils.eq(instance.value(i), (double)(int)instance.value(i)))) { return false; } else if (Utils.sm(instance.value(i), 0) || Utils.gr(instance.value(i), attribute(i).numValues())) { return false; } } } return true; } /** * Returns the class attribute. * * @return the class attribute * @exception UnassignedClassException if the class is not set */ public final Attribute classAttribute() { if (m_ClassIndex < 0) { throw new UnassignedClassException("Class index is negative (not set)!"); } return attribute(m_ClassIndex); } /** * Returns the class attribute's index. Returns negative number * if it's undefined. * * @return the class index as an integer */ public final int classIndex() { return m_ClassIndex; } /** * Compactifies the set of instances. Decreases the capacity of * the set so that it matches the number of instances in the set. */ public final void compactify() { m_Instances.trimToSize(); } /** * Removes all instances from the set. */ public final void delete() { m_Instances = new FastVector(); } /** * Removes an instance at the given position from the set. * * @param index the instance's position */ public final void delete(int index) { m_Instances.removeElementAt(index); } /** * Deletes an attribute at the given position * (0 to numAttributes() - 1). A deep copy of the attribute * information is performed before the attribute is deleted. * * @param pos the attribute's position * @exception IllegalArgumentException if the given index is out of range or the * class attribute is being deleted */ public void deleteAttributeAt(int position) { if ((position < 0) || (position >= m_Attributes.size())) { throw new IllegalArgumentException("Index out of range"); } if (position == m_ClassIndex) { throw new IllegalArgumentException("Can't delete class attribute"); } freshAttributeInfo(); if (m_ClassIndex > position) { m_ClassIndex--; } m_Attributes.removeElementAt(position);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -