📄 instances.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Instances.java
* Copyright (C) 1999 Eibe Frank
*
*/
package weka.core;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.StreamTokenizer;
import java.text.ParseException;
import java.util.Enumeration;
import java.util.Random;
/**
* Class for handling an ordered set of weighted instances. <p>
*
* Typical usage (code from the main() method of this class): <p>
*
* <code>
* ... <br>
*
* // Read all the instances in the file <br>
* reader = new FileReader(filename); <br>
* instances = new Instances(reader); <br><br>
*
* // Make the last attribute be the class <br>
* instances.setClassIndex(instances.numAttributes() - 1); <br><br>
*
* // Print header and instances. <br>
* System.out.println("\nDataset:\n"); <br>
* System.out.println(instances); <br><br>
*
* ... <br>
* </code><p>
*
* All methods that change a set of instances are safe, ie. a change
* of a set of instances does not affect any other sets of
* instances. All methods that change a datasets's attribute
* information clone the dataset before it is changed.
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @author Len Trigg (trigg@cs.waikato.ac.nz)
* @version $Revision$
*/
public class Instances implements Serializable {
/**
*
*/
private static final long serialVersionUID = -2653844491123990234L;
/** The filename extension that should be used for arff files */
public static String FILE_EXTENSION = ".arff";
/** The filename extension that should be used for bin. serialized instances files */
public static String SERIALIZED_OBJ_FILE_EXTENSION = ".bsi";
/** The keyword used to denote the start of an arff header */
static String ARFF_RELATION = "@relation";
/** The keyword used to denote the start of the arff data section */
static String ARFF_DATA = "@data";
/** The dataset's name. */
protected String m_RelationName;
/** The attribute information. */
protected FastVector m_Attributes;
/** The instances. */
protected FastVector m_Instances;
/** The class attribute's index */
protected int m_ClassIndex;
/** Buffer of values for sparse instance */
protected double[] m_ValueBuffer;
/** Buffer of indices for sparse instance */
protected int[] m_IndicesBuffer;
/**
* Reads an ARFF file from a reader, and assigns a weight of
* one to each instance. Lets the index of the class
* attribute be undefined (negative).
*
* @param reader the reader
* @exception IOException if the ARFF file is not read
* successfully
*/
public Instances(Reader reader) throws IOException {
StreamTokenizer tokenizer;
tokenizer = new StreamTokenizer(reader);
initTokenizer(tokenizer);
readHeader(tokenizer);
m_ClassIndex = -1;
m_Instances = new FastVector(1000);
while (getInstance(tokenizer, true)) {};
compactify();
}
/**
* Reads the header of an ARFF file from a reader and
* reserves space for the given number of instances. Lets
* the class index be undefined (negative).
*
* @param reader the reader
* @param capacity the capacity
* @exception IllegalArgumentException if the header is not read successfully
* or the capacity is negative.
* @exception IOException if there is a problem with the reader.
*/
public Instances(Reader reader, int capacity) throws IOException {
StreamTokenizer tokenizer;
if (capacity < 0) {
throw new IllegalArgumentException("Capacity has to be positive!");
}
tokenizer = new StreamTokenizer(reader);
initTokenizer(tokenizer);
readHeader(tokenizer);
m_ClassIndex = -1;
m_Instances = new FastVector(capacity);
}
/**
* Constructor copying all instances and references to
* the header information from the given set of instances.
*
* @param instances the set to be copied
*/
public Instances(Instances dataset) {
this(dataset, dataset.numInstances());
dataset.copyInstances(0, this, dataset.numInstances());
}
/**
* Constructor creating an empty set of instances. Copies references
* to the header information from the given set of instances. Sets
* the capacity of the set of instances to 0 if its negative.
*
* @param instances the instances from which the header
* information is to be taken
* @param capacity the capacity of the new dataset
*/
public Instances(Instances dataset, int capacity) {
if (capacity < 0) {
capacity = 0;
}
// Strings only have to be "shallow" copied because
// they can't be modified.
m_ClassIndex = dataset.m_ClassIndex;
m_RelationName = dataset.m_RelationName;
m_Attributes = dataset.m_Attributes;
m_Instances = new FastVector(capacity);
}
/**
* Creates a new set of instances by copying a
* subset of another set.
*
* @param source the set of instances from which a subset
* is to be created
* @param first the index of the first instance to be copied
* @param toCopy the number of instances to be copied
* @exception IllegalArgumentException if first and toCopy are out of range
*/
public Instances(Instances source, int first, int toCopy) {
this(source, toCopy);
if ((first < 0) || ((first + toCopy) > source.numInstances())) {
throw new IllegalArgumentException("Parameters first and/or toCopy out "+
"of range");
}
source.copyInstances(first, this, toCopy);
}
/**
* Creates an empty set of instances. Uses the given
* attribute information. Sets the capacity of the set of
* instances to 0 if its negative. Given attribute information
* must not be changed after this constructor has been used.
*
* @param name the name of the relation
* @param attInfo the attribute information
* @param capacity the capacity of the set
*/
public Instances(String name, FastVector attInfo, int capacity) {
m_RelationName = name;
m_ClassIndex = -1;
m_Attributes = attInfo;
for (int i = 0; i < numAttributes(); i++) {
attribute(i).setIndex(i);
}
m_Instances = new FastVector(capacity);
}
/**
* Create a copy of the structure, but "cleanse" string types (i.e.
* doesn't contain references to the strings seen in the past).
*
* @return a copy of the instance structure.
*/
public Instances stringFreeStructure() {
FastVector atts = (FastVector)m_Attributes.copy();
for (int i = 0 ; i < atts.size(); i++) {
Attribute att = (Attribute)atts.elementAt(i);
if (att.type() == Attribute.STRING) {
atts.setElementAt(new Attribute(att.name(), (FastVector)null), i);
}
}
Instances result = new Instances(relationName(), atts, 0);
result.m_ClassIndex = m_ClassIndex;
return result;
}
/**
* Adds one instance to the end of the set.
* Shallow copies instance before it is added. Increases the
* size of the dataset if it is not large enough. Does not
* check if the instance is compatible with the dataset.
*
* @param instance the instance to be added
*/
public final void add(Instance instance) {
Instance newInstance = (Instance)instance.copy();
newInstance.setDataset(this);
m_Instances.addElement(newInstance);
}
/**
* Returns an attribute.
*
* @param index the attribute's index
* @return the attribute at the given position
*/
public final Attribute attribute(int index) {
return (Attribute) m_Attributes.elementAt(index);
}
/**
* Returns an attribute given its name. If there is more than
* one attribute with the same name, it returns the first one.
* Returns null if the attribute can't be found.
*
* @param name the attribute's name
* @return the attribute with the given name, null if the
* attribute can't be found
*/
public final Attribute attribute(String name) {
for (int i = 0; i < numAttributes(); i++) {
if (attribute(i).name().equals(name)) {
return attribute(i);
}
}
return null;
}
/**
* Checks for string attributes in the dataset
*
* @return true if string attributes are present, false otherwise
*/
public boolean checkForStringAttributes() {
int i = 0;
while (i < m_Attributes.size()) {
if (attribute(i++).isString()) {
return true;
}
}
return false;
}
/**
* Returns the names of the first String type (Unbounded Categorical)
* attribute to give more infor in the Exception.
*
* @return true if string attributes are present, false otherwise
* By Twang, Aug 16, 2005.
*/
public String getUnboundedStringAttributeName() {
int i = 0;
int j = 0;
while (i < m_Attributes.size()) {
j = i;
if (attribute(i++).isString()) {
return attribute(j).name();
}
}
return "NOT FIND";
}
/**
* Checks if the given instance is compatible with this dataset. Only looks at
* the size of the instance and the ranges of the values for nominal and
* string attributes.
*
* @return true if the instance is compatible with the dataset
*/
public final boolean checkInstance(Instance instance) {
if (instance.numAttributes() != numAttributes()) {
return false;
}
for (int i = 0; i < numAttributes(); i++) {
if (instance.isMissing(i)) {
continue;
} else if (attribute(i).isNominal() ||
attribute(i).isString()) {
if (!(Utils.eq(instance.value(i),
(double)(int)instance.value(i)))) {
return false;
} else if (Utils.sm(instance.value(i), 0) ||
Utils.gr(instance.value(i),
attribute(i).numValues())) {
return false;
}
}
}
return true;
}
/**
* Returns the class attribute.
*
* @return the class attribute
* @exception UnassignedClassException if the class is not set
*/
public final Attribute classAttribute() {
if (m_ClassIndex < 0) {
throw new UnassignedClassException("Class index is negative (not set)!");
}
return attribute(m_ClassIndex);
}
/**
* Returns the class attribute's index. Returns negative number
* if it's undefined.
*
* @return the class index as an integer
*/
public final int classIndex() {
return m_ClassIndex;
}
/**
* Compactifies the set of instances. Decreases the capacity of
* the set so that it matches the number of instances in the set.
*/
public final void compactify() {
m_Instances.trimToSize();
}
/**
* Removes all instances from the set.
*/
public final void delete() {
m_Instances = new FastVector();
}
/**
* Removes an instance at the given position from the set.
*
* @param index the instance's position
*/
public final void delete(int index) {
m_Instances.removeElementAt(index);
}
/**
* Deletes an attribute at the given position
* (0 to numAttributes() - 1). A deep copy of the attribute
* information is performed before the attribute is deleted.
*
* @param pos the attribute's position
* @exception IllegalArgumentException if the given index is out of range or the
* class attribute is being deleted
*/
public void deleteAttributeAt(int position) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -