📄 instances.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Instances.java
* Copyright (C) 1999 Eibe Frank
*
*/
package weka.core;
import java.io.*;
import java.text.ParseException;
import java.util.*;
/**
* Class for handling an ordered set of weighted instances. <p>
*
* Typical usage (code from the main() method of this class): <p>
*
* <code>
* ... <br>
*
* // Read all the instances in the file <br>
* reader = new FileReader(filename); <br>
* instances = new Instances(reader); <br><br>
*
* // Make the last attribute be the class <br>
* instances.setClassIndex(instances.numAttributes() - 1); <br><br>
*
* // Print header and instances. <br>
* System.out.println("\nDataset:\n"); <br>
* System.out.println(instances); <br><br>
*
* ... <br>
* </code><p>
*
* All methods that change a set of instances are safe, ie. a change
* of a set of instances does not affect any other sets of
* instances. All methods that change a datasets's attribute
* information clone the dataset before it is changed.
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision: 1.1 $
*/
public class Instances implements Serializable {
/** The filename extension that should be used for arff files */
public static String FILE_EXTENSION = ".arff";
/** The filename extension that should be used for bin. serialized instances files */
public static String SERIALIZED_OBJ_FILE_EXTENSION = ".bsi";
/** The keyword used to denote the start of an arff header */
static String ARFF_RELATION = "@relation";
/** The keyword used to denote the start of the arff data section */
static String ARFF_DATA = "@data";
/** The dataset's name. */
protected /*@spec_public non_null@*/ String m_RelationName;
/** The attribute information. */
protected /*@spec_public non_null@*/ FastVector m_Attributes;
/* public invariant (\forall int i; 0 <= i && i < m_Attributes.size();
m_Attributes.elementAt(i) != null);
*/
/** The instances. */
protected /*@spec_public non_null@*/ FastVector m_Instances;
/** The class attribute's index */
protected int m_ClassIndex;
//@ protected invariant classIndex() == m_ClassIndex;
/** Buffer of values for sparse instance */
protected double[] m_ValueBuffer;
/** Buffer of indices for sparse instance */
protected int[] m_IndicesBuffer;
/**
* Reads an ARFF file from a reader, and assigns a weight of
* one to each instance. Lets the index of the class
* attribute be undefined (negative).
*
* @param reader the reader
* @exception IOException if the ARFF file is not read
* successfully
*/
public Instances(/*@non_null@*/Reader reader) throws IOException {
StreamTokenizer tokenizer;
tokenizer = new StreamTokenizer(reader);
initTokenizer(tokenizer);
readHeader(tokenizer);
m_ClassIndex = -1;
m_Instances = new FastVector(1000);
while (getInstance(tokenizer, true)) {};
compactify();
}
/**
* Reads the header of an ARFF file from a reader and
* reserves space for the given number of instances. Lets
* the class index be undefined (negative).
*
* @param reader the reader
* @param capacity the capacity
* @exception IllegalArgumentException if the header is not read successfully
* or the capacity is negative.
* @exception IOException if there is a problem with the reader.
*/
//@ requires capacity >= 0;
//@ ensures classIndex() == -1;
public Instances(/*@non_null@*/Reader reader, int capacity)
throws IOException {
StreamTokenizer tokenizer;
if (capacity < 0) {
throw new IllegalArgumentException("Capacity has to be positive!");
}
tokenizer = new StreamTokenizer(reader);
initTokenizer(tokenizer);
readHeader(tokenizer);
m_ClassIndex = -1;
m_Instances = new FastVector(capacity);
}
/**
* Constructor copying all instances and references to
* the header information from the given set of instances.
*
* @param instances the set to be copied
*/
public Instances(/*@non_null@*/Instances dataset) {
this(dataset, dataset.numInstances());
dataset.copyInstances(0, this, dataset.numInstances());
}
/**
* Constructor creating an empty set of instances. Copies references
* to the header information from the given set of instances. Sets
* the capacity of the set of instances to 0 if its negative.
*
* @param instances the instances from which the header
* information is to be taken
* @param capacity the capacity of the new dataset
*/
public Instances(/*@non_null@*/Instances dataset, int capacity) {
if (capacity < 0) {
capacity = 0;
}
// Strings only have to be "shallow" copied because
// they can't be modified.
m_ClassIndex = dataset.m_ClassIndex;
m_RelationName = dataset.m_RelationName;
m_Attributes = dataset.m_Attributes;
m_Instances = new FastVector(capacity);
}
/**
* Creates a new set of instances by copying a
* subset of another set.
*
* @param source the set of instances from which a subset
* is to be created
* @param first the index of the first instance to be copied
* @param toCopy the number of instances to be copied
* @exception IllegalArgumentException if first and toCopy are out of range
*/
//@ requires 0 <= first;
//@ requires 0 <= toCopy;
//@ requires first + toCopy <= source.numInstances();
public Instances(/*@non_null@*/Instances source, int first, int toCopy) {
this(source, toCopy);
if ((first < 0) || ((first + toCopy) > source.numInstances())) {
throw new IllegalArgumentException("Parameters first and/or toCopy out "+
"of range");
}
source.copyInstances(first, this, toCopy);
}
/**
* Creates an empty set of instances. Uses the given
* attribute information. Sets the capacity of the set of
* instances to 0 if its negative. Given attribute information
* must not be changed after this constructor has been used.
*
* @param name the name of the relation
* @param attInfo the attribute information
* @param capacity the capacity of the set
*/
public Instances(/*@non_null@*/String name,
/*@non_null@*/FastVector attInfo, int capacity) {
m_RelationName = name;
m_ClassIndex = -1;
m_Attributes = attInfo;
for (int i = 0; i < numAttributes(); i++) {
attribute(i).setIndex(i);
}
m_Instances = new FastVector(capacity);
}
/**
* Create a copy of the structure, but "cleanse" string types (i.e.
* doesn't contain references to the strings seen in the past).
*
* @return a copy of the instance structure.
*/
public Instances stringFreeStructure() {
FastVector atts = (FastVector)m_Attributes.copy();
for (int i = 0 ; i < atts.size(); i++) {
Attribute att = (Attribute)atts.elementAt(i);
if (att.type() == Attribute.STRING) {
atts.setElementAt(new Attribute(att.name(), (FastVector)null), i);
}
}
Instances result = new Instances(relationName(), atts, 0);
result.m_ClassIndex = m_ClassIndex;
return result;
}
/**
* Adds one instance to the end of the set.
* Shallow copies instance before it is added. Increases the
* size of the dataset if it is not large enough. Does not
* check if the instance is compatible with the dataset.
*
* @param instance the instance to be added
*/
public void add(/*@non_null@*/ Instance instance) {
Instance newInstance = (Instance)instance.copy();
newInstance.setDataset(this);
m_Instances.addElement(newInstance);
}
/**
* Returns an attribute.
*
* @param index the attribute's index
* @return the attribute at the given position
*/
//@ requires 0 <= index;
//@ requires index < m_Attributes.size();
//@ ensures \result != null;
public /*@pure@*/ Attribute attribute(int index) {
return (Attribute) m_Attributes.elementAt(index);
}
/**
* Returns an attribute given its name. If there is more than
* one attribute with the same name, it returns the first one.
* Returns null if the attribute can't be found.
*
* @param name the attribute's name
* @return the attribute with the given name, null if the
* attribute can't be found
*/
public /*@pure@*/ Attribute attribute(String name) {
for (int i = 0; i < numAttributes(); i++) {
if (attribute(i).name().equals(name)) {
return attribute(i);
}
}
return null;
}
/**
* Checks for string attributes in the dataset
*
* @return true if string attributes are present, false otherwise
*/
public /*@pure@*/ boolean checkForStringAttributes() {
int i = 0;
while (i < m_Attributes.size()) {
if (attribute(i++).isString()) {
return true;
}
}
return false;
}
/**
* Checks if the given instance is compatible
* with this dataset. Only looks at the size of
* the instance and the ranges of the values for
* nominal and string attributes.
*
* @return true if the instance is compatible with the dataset
*/
public /*@pure@*/ boolean checkInstance(Instance instance) {
if (instance.numAttributes() != numAttributes()) {
return false;
}
for (int i = 0; i < numAttributes(); i++) {
if (instance.isMissing(i)) {
continue;
} else if (attribute(i).isNominal() ||
attribute(i).isString()) {
if (!(Utils.eq(instance.value(i),
(double)(int)instance.value(i)))) {
return false;
} else if (Utils.sm(instance.value(i), 0) ||
Utils.gr(instance.value(i),
attribute(i).numValues())) {
return false;
}
}
}
return true;
}
/**
* Returns the class attribute.
*
* @return the class attribute
* @exception UnassignedClassException if the class is not set
*/
//@ requires classIndex() >= 0;
public /*@pure@*/ Attribute classAttribute() {
if (m_ClassIndex < 0) {
throw new UnassignedClassException("Class index is negative (not set)!");
}
return attribute(m_ClassIndex);
}
/**
* Returns the class attribute's index. Returns negative number
* if it's undefined.
*
* @return the class index as an integer
*/
// ensures \result == m_ClassIndex;
public /*@pure@*/ int classIndex() {
return m_ClassIndex;
}
/**
* Compactifies the set of instances. Decreases the capacity of
* the set so that it matches the number of instances in the set.
*/
public void compactify() {
m_Instances.trimToSize();
}
/**
* Removes all instances from the set.
*/
public void delete() {
m_Instances = new FastVector();
}
/**
* Removes an instance at the given position from the set.
*
* @param index the instance's position
*/
//@ requires 0 <= index && index < numInstances();
public void delete(int index) {
m_Instances.removeElementAt(index);
}
/**
* Deletes an attribute at the given position
* (0 to numAttributes() - 1). A deep copy of the attribute
* information is performed before the attribute is deleted.
*
* @param pos the attribute's position
* @exception IllegalArgumentException if the given index is out of range
* or the class attribute is being deleted
*/
//@ requires 0 <= position && position < numAttributes();
//@ requires position != classIndex();
public void deleteAttributeAt(int position) {
if ((position < 0) || (position >= m_Attributes.size())) {
throw new IllegalArgumentException("Index out of range");
}
if (position == m_ClassIndex) {
throw new IllegalArgumentException("Can't delete class attribute");
}
freshAttributeInfo();
if (m_ClassIndex > position) {
m_ClassIndex--;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -