📄 attribute.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Attribute.java
* Copyright (C) 1999 Eibe Frank
*
*/
package weka.core;
import java.io.IOException;
import java.io.Serializable;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Properties;
/**
* Class for handling an attribute. Once an attribute has been created,
* it can't be changed. <p>
*
* Three attribute types are supported:
* <ul>
* <li> numeric: <ul>
* This type of attribute represents a floating-point number.
* </ul>
* <li> nominal: <ul>
* This type of attribute represents a fixed set of nominal values.
* </ul>
* <li> string: <ul>
* This type of attribute represents a dynamically expanding set of
* nominal values. String attributes are not used by the learning
* schemes in Weka. They can be used, for example, to store an
* identifier with each instance in a dataset.
* </ul>
* </ul>
* Typical usage (code from the main() method of this class): <p>
*
* <code>
* ... <br>
*
* // Create numeric attributes "length" and "weight" <br>
* Attribute length = new Attribute("length"); <br>
* Attribute weight = new Attribute("weight"); <br><br>
*
* // Create vector to hold nominal values "first", "second", "third" <br>
* FastVector my_nominal_values = new FastVector(3); <br>
* my_nominal_values.addElement("first"); <br>
* my_nominal_values.addElement("second"); <br>
* my_nominal_values.addElement("third"); <br><br>
*
* // Create nominal attribute "position" <br>
* Attribute position = new Attribute("position", my_nominal_values);<br>
*
* ... <br>
* </code><p>
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version $Revision$
*/
public class Attribute implements Copyable, Serializable {
/** Constant set for numeric attributes. */
public static final int NUMERIC = 0;
/** Constant set for nominal attributes. */
public static final int NOMINAL = 1;
/** Constant set for attributes with string values. */
public static final int STRING = 2;
/** Constant set for attributes with date values. */
public static final int DATE = 3;
/** Constant set for symbolic attributes. */
public static final int ORDERING_SYMBOLIC = 0;
/** Constant set for ordered attributes. */
public static final int ORDERING_ORDERED = 1;
/** Constant set for modulo-ordered attributes. */
public static final int ORDERING_MODULO = 2;
/** The keyword used to denote the start of an arff attribute declaration */
static String ARFF_ATTRIBUTE = "@attribute";
/** A keyword used to denote a numeric attribute */
static String ARFF_ATTRIBUTE_INTEGER = "integer";
/** A keyword used to denote a numeric attribute */
static String ARFF_ATTRIBUTE_REAL = "real";
/** A keyword used to denote a numeric attribute */
static String ARFF_ATTRIBUTE_NUMERIC = "numeric";
/** The keyword used to denote a string attribute */
static String ARFF_ATTRIBUTE_STRING = "string";
/** The keyword used to denote a date attribute */
static String ARFF_ATTRIBUTE_DATE = "date";
/** Strings longer than this will be stored compressed. */
private static final int STRING_COMPRESS_THRESHOLD = 200;
/** The attribute's name. */
private String m_Name;
/** The attribute's type. */
private int m_Type;
/** The attribute's values (if nominal or string). */
private FastVector m_Values;
/** Mapping of values to indices (if nominal or string). */
private Hashtable m_Hashtable;
/** Date format specification for date attributes */
private SimpleDateFormat m_DateFormat;
/** The attribute's index. */
private int m_Index;
/** The attribute's metadata. */
private ProtectedProperties m_Metadata;
/** The attribute's ordering. */
private int m_Ordering;
/** Whether the attribute is regular. */
private boolean m_IsRegular;
/** Whether the attribute is averagable. */
private boolean m_IsAveragable;
/** Whether the attribute has a zeropoint. */
private boolean m_HasZeropoint;
/** The attribute's weight. */
private double m_Weight;
/** The attribute's lower numeric bound. */
private double m_LowerBound;
/** Whether the lower bound is open. */
private boolean m_LowerBoundIsOpen;
/** The attribute's upper numeric bound. */
private double m_UpperBound;
/** Whether the upper bound is open */
private boolean m_UpperBoundIsOpen;
/**
* Constructor for a numeric attribute.
*
* @param attributeName the name for the attribute
*/
public Attribute(String attributeName) {
this(attributeName, new ProtectedProperties(new Properties()));
}
/**
* Constructor for a numeric attribute, where metadata is supplied.
*
* @param attributeName the name for the attribute
* @param metadata the attribute's properties
*/
public Attribute(String attributeName, ProtectedProperties metadata) {
m_Name = attributeName;
m_Index = -1;
m_Values = null;
m_Hashtable = null;
m_Type = NUMERIC;
setMetadata(metadata);
}
/**
* Constructor for a date attribute.
*
* @param attributeName the name for the attribute
* @param dateFormat a string suitable for use with
* SimpleDateFormatter for parsing dates.
*/
public Attribute(String attributeName, String dateFormat) {
this(attributeName, dateFormat,
new ProtectedProperties(new Properties()));
}
/**
* Constructor for a date attribute, where metadata is supplied.
*
* @param attributeName the name for the attribute
* @param dateFormat a string suitable for use with
* SimpleDateFormatter for parsing dates.
* @param metadata the attribute's properties
*/
public Attribute(String attributeName, String dateFormat,
ProtectedProperties metadata) {
m_Name = attributeName;
m_Index = -1;
m_Values = null;
m_Hashtable = null;
m_Type = DATE;
if (dateFormat != null) {
m_DateFormat = new SimpleDateFormat(dateFormat);
} else {
m_DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
}
m_DateFormat.setLenient(false);
setMetadata(metadata);
}
/**
* Constructor for nominal attributes and string attributes.
* If a null vector of attribute values is passed to the method,
* the attribute is assumed to be a string.
*
* @param attributeName the name for the attribute
* @param attributeValues a vector of strings denoting the
* attribute values. Null if the attribute is a string attribute.
*/
public Attribute(String attributeName,
FastVector attributeValues) {
this(attributeName, attributeValues,
new ProtectedProperties(new Properties()));
}
/**
* Constructor for nominal attributes and string attributes, where
* metadata is supplied. If a null vector of attribute values is passed
* to the method, the attribute is assumed to be a string.
*
* @param attributeName the name for the attribute
* @param attributeValues a vector of strings denoting the
* attribute values. Null if the attribute is a string attribute.
* @param metadata the attribute's properties
*/
public Attribute(String attributeName,
FastVector attributeValues,
ProtectedProperties metadata) {
m_Name = attributeName;
m_Index = -1;
if (attributeValues == null) {
m_Values = new FastVector();
m_Hashtable = new Hashtable();
m_Type = STRING;
} else {
m_Values = new FastVector(attributeValues.size());
m_Hashtable = new Hashtable(attributeValues.size());
for (int i = 0; i < attributeValues.size(); i++) {
Object store = attributeValues.elementAt(i);
if (((String)store).length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(attributeValues.elementAt(i), true);
} catch (Exception ex) {
System.err.println("Couldn't compress nominal attribute value -"
+ " storing uncompressed.");
}
}
if (m_Values.indexOf(store) >= 0) {
throw new IllegalArgumentException("A nominal attribute (" +
attributeName + ") cannot"
+ " have duplicate labels.");
}
m_Values.addElement(store);
m_Hashtable.put(store, new Integer(i));
}
m_Type = NOMINAL;
}
setMetadata(metadata);
}
/**
* Produces a shallow copy of this attribute.
*
* @return a copy of this attribute with the same index
*/
public Object copy() {
Attribute copy = new Attribute(m_Name);
copy.m_Index = m_Index;
copy.m_Type = m_Type;
copy.m_Values = m_Values;
copy.m_Hashtable = m_Hashtable;
copy.m_DateFormat = m_DateFormat;
copy.setMetadata(m_Metadata);
return copy;
}
/**
* Returns an enumeration of all the attribute's values if
* the attribute is nominal or a string, null otherwise.
*
* @return enumeration of all the attribute's values
*/
public final Enumeration emerateValues() {
if (isNominal() || isString()) {
final Enumeration ee = m_Values.elements();
return new Enumeration () {
public boolean hasMoreElements() {
return ee.hasMoreElements();
}
public Object nextElement() {
Object oo = ee.nextElement();
if (oo instanceof SerializedObject) {
return ((SerializedObject)oo).getObject();
} else {
return oo;
}
}
};
}
return null;
}
/**
* Tests if given attribute is equal to this attribute.
*
* @param other the Object to be compared to this attribute
* @return true if the given attribute is equal to this attribute
*/
public final boolean equals(Object other) {
if ((other == null) || !(other.getClass().equals(this.getClass()))) {
return false;
}
Attribute att = (Attribute) other;
if (!m_Name.equals(att.m_Name)) {
return false;
}
if (isNominal() && att.isNominal()) {
if (m_Values.size() != att.m_Values.size()) {
return false;
}
for (int i = 0; i < m_Values.size(); i++) {
if (!m_Values.elementAt(i).equals(att.m_Values.elementAt(i))) {
return false;
}
}
return true;
} else {
return (type() == att.type());
}
}
/**
* Returns the index of this attribute.
*
* @return the index of this attribute
*/
public final int index() {
return m_Index;
}
/**
* Returns the index of a given attribute value. (The index of
* the first occurence of this value.)
*
* @param value the value for which the index is to be returned
* @return the index of the given attribute value if attribute
* is nominal or a string, -1 if it is numeric or the value
* can't be found
*/
public final int indexOfValue(String value) {
if (!isNominal() && !isString())
return -1;
Object store = value;
if (value.length() > STRING_COMPRESS_THRESHOLD) {
try {
store = new SerializedObject(value, true);
} catch (Exception ex) {
System.err.println("Couldn't compress string attribute value -"
+ " searching uncompressed.");
}
}
Integer val = (Integer)m_Hashtable.get(store);
if (val == null) return -1;
else return val.intValue();
}
/**
* Test if the attribute is nominal.
*
* @return true if the attribute is nominal
*/
public final boolean isNominal() {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -