📄 sparseinstance.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.core;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import java.util.Enumeration;
/**
* Class for storing an instance as a sparse vector. A sparse instance
* only requires storage for those attribute values that are non-zero.
* Since the objective is to reduce storage requirements for datasets
* with large numbers of default values, this also includes nominal
* attributes -- the first nominal value (i.e. that which has index 0)
* will not require explicit storage, so rearrange your nominal attribute
* value orderings if necessary. Missing values will be stored
* explicitly.
*/
public class SparseInstance extends Instance {
/** The index of the attribute associated with each stored value. */
protected int[] m_Indices;
/** The maximum number of values that can be stored. */
protected int m_NumAttributes;
protected SparseInstance() {
}
/**
* Constructor that generates a sparse instance from the given
* instance. Reference to the dataset is set to null.
* (ie. the instance doesn't have access to information about the
* attribute types)
*
* @param instance the instance from which the attribute values
* and the weight are to be copied
*/
public SparseInstance(Instance instance) {
m_Weight = instance.m_Weight;
m_Dataset = null;
m_NumAttributes = instance.numAttributes();
if (instance instanceof SparseInstance) {
m_AttValues = ((SparseInstance)instance).m_AttValues;
m_Indices = ((SparseInstance)instance).m_Indices;
} else {
double[] tempValues = new double[instance.numAttributes()];
int[] tempIndices = new int[instance.numAttributes()];
int vals = 0;
for (int i = 0; i < instance.numAttributes(); i++) {
if (instance.value(i) != 0) {
tempValues[vals] = instance.value(i);
tempIndices[vals] = i;
vals++;
}
}
m_AttValues = new double[vals];
m_Indices = new int[vals];
System.arraycopy(tempValues, 0, m_AttValues, 0, vals);
System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
}
}
/**
* Constructor that copies the info from the given instance.
* Reference to the dataset is set to null.
* (ie. the instance doesn't have access to information about the
* attribute types)
*
* @param instance the instance from which the attribute
* info is to be copied
*/
public SparseInstance(SparseInstance instance) {
m_AttValues = instance.m_AttValues;
m_Indices = instance.m_Indices;
m_Weight = instance.m_Weight;
m_NumAttributes = instance.m_NumAttributes;
m_Dataset = null;
}
/**
* Constructor that generates a sparse instance from the given
* parameters. Reference to the dataset is set to null.
* (ie. the instance doesn't have access to information about the
* attribute types)
*
* @param weight the instance's weight
* @param attValues a vector of attribute values
*/
public SparseInstance(double weight, double[] attValues) {
m_Weight = weight;
m_Dataset = null;
m_NumAttributes = attValues.length;
double[] tempValues = new double[m_NumAttributes];
int[] tempIndices = new int[m_NumAttributes];
int vals = 0;
for (int i = 0; i < m_NumAttributes; i++) {
if (attValues[i] != 0) {
tempValues[vals] = attValues[i];
tempIndices[vals] = i;
vals++;
}
}
m_AttValues = new double[vals];
m_Indices = new int[vals];
System.arraycopy(tempValues, 0, m_AttValues, 0, vals);
System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
}
/**
* Constructor that inititalizes instance variable with given
* values. Reference to the dataset is set to null. (ie. the instance
* doesn't have access to information about the attribute types)
*
* @param weight the instance's weight
* @param attValues a vector of attribute values (just the ones to be stored)
* @param indices the indices of the given values in the full vector
* @param maxNumValues the maximium number of values that can be stored
*/
public SparseInstance(double weight, double[] attValues,
int[] indices, int maxNumValues){
int vals = 0;
m_AttValues = new double [attValues.length];
m_Indices = new int [indices.length];
for (int i = 0; i < attValues.length; i++) {
if (attValues[i] != 0) {
m_AttValues[vals] = attValues[i];
m_Indices[vals] = indices[i];
vals++;
}
}
if (vals != attValues.length) {
// Need to truncate.
double [] newVals = new double[vals];
System.arraycopy(m_AttValues, 0, newVals, 0, vals);
m_AttValues = newVals;
int [] newIndices = new int[vals];
System.arraycopy(m_Indices, 0, newIndices, 0, vals);
m_Indices = newIndices;
}
m_Weight = weight;
m_NumAttributes = maxNumValues;
m_Dataset = null;
}
/**
* Constructor of an instance that sets weight to one, all values to
* be missing, and the reference to the dataset to null. (ie. the instance
* doesn't have access to information about the attribute types)
*
* @param numAttributes the size of the instance
*/
public SparseInstance(int numAttributes) {
m_AttValues = new double[numAttributes];
m_NumAttributes = numAttributes;
m_Indices = new int[numAttributes];
for (int i = 0; i < m_AttValues.length; i++) {
m_AttValues[i] = MISSING_VALUE;
m_Indices[i] = i;
}
m_Weight = 1;
m_Dataset = null;
}
/**
* Returns the attribute associated with the internal index.
*
* @param indexOfIndex the index of the attribute's index
* @return the attribute at the given position
* @exception UnassignedDatasetException if instance doesn't have access to a
* dataset
*/
public Attribute attributeSparse(int indexOfIndex) {
if (m_Dataset == null) {
throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
}
return m_Dataset.attribute(m_Indices[indexOfIndex]);
}
/**
* Produces a shallow copy of this instance. The copy has
* access to the same dataset. (if you want to make a copy
* that doesn't have access to the dataset, use
* <code>new SparseInstance(instance)</code>
*
* @return the shallow copy
*/
public Object copy() {
Instance result = new SparseInstance(this);
result.m_Dataset = m_Dataset;
return result;
}
/**
* Returns the index of the attribute stored at the given position.
*
* @param position the position
* @return the index of the attribute stored at the given position
*/
public int index(int position) {
return m_Indices[position];
}
/**
* Tests if a specific value is "missing".
*
* @param attIndex the attribute's index
*/
public boolean isMissing(int attIndex) {
if (Double.isNaN(value(attIndex))) {
return true;
}
return false;
}
/**
* Locates the greatest index that is not greater than the
* given index.
*
* @return the internal index of the attribute index. Returns
* -1 if no index with this property couldn't be found
*/
public int locateIndex(int index) {
int min = 0, max = m_Indices.length - 1;
// Binary search
while (max >= min) {
int current = (max + min) / 2;
if (m_Indices[current] > index) {
max = current - 1;
} else if (m_Indices[current] < index) {
min = current + 1;
} else {
return current;
}
}
return max;
}
/**
* Merges this instance with the given instance and returns
* the result. Dataset is set to null.
*
* @param inst the instance to be merged with this one
* @return the merged instances
*/
public Instance mergeInstance(Instance inst) {
double[] values = new double[numValues() + inst.numValues()];
int[] indices = new int[numValues() + inst.numValues()];
int m = 0;
for (int j = 0; j < numValues(); j++, m++) {
values[m] = valueSparse(j);
indices[m] = index(j);
}
for (int j = 0; j < inst.numValues(); j++, m++) {
values[m] = inst.valueSparse(j);
indices[m] = inst.index(j) + inst.numAttributes();
}
return new SparseInstance(1.0, values, indices, numAttributes() +
inst.numAttributes());
}
/**
* Returns the number of attributes.
*
* @return the number of attributes as an integer
*/
public int numAttributes() {
return m_NumAttributes;
}
/**
* Returns the number of values in the sparse vector.
*
* @return the number of values
*/
public int numValues() {
return m_Indices.length;
}
/**
* Replaces all missing values in the instance with the
* values contained in the given array. A deep copy of
* the vector of attribute values is performed before the
* values are replaced.
*
* @param array containing the means and modes
* @exception IllegalArgumentException if numbers of attributes are unequal
*/
public void replaceMissingValues(double[] array) {
if ((array == null) || (array.length != m_NumAttributes)) {
throw new IllegalArgumentException("Unequal number of attributes!");
}
double[] tempValues = new double[m_AttValues.length];
int[] tempIndices = new int[m_AttValues.length];
int vals = 0;
for (int i = 0; i < m_AttValues.length; i++) {
if (isMissingValue(m_AttValues[i])) {
if (array[m_Indices[i]] != 0) {
tempValues[vals] = array[m_Indices[i]];
tempIndices[vals] = m_Indices[i];
vals++;
}
} else {
tempValues[vals] = m_AttValues[i];
tempIndices[vals] = m_Indices[i];
vals++;
}
}
m_AttValues = new double[vals];
m_Indices = new int[vals];
System.arraycopy(tempValues, 0, m_AttValues, 0, vals);
System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
}
/**
* Sets a specific value in the instance to the given value
* (internal floating-point format). Performs a deep copy
* of the vector of attribute values before the value is set.
*
* @param attIndex the attribute's index
* @param value the new attribute value (If the corresponding
* attribute is nominal (or a string) then this is the new value's
* index as a double).
*/
public void setValue(int attIndex, double value) {
int index = locateIndex(attIndex);
if ((index >= 0) && (m_Indices[index] == attIndex)) {
if (value != 0) {
double[] tempValues = new double[m_AttValues.length];
System.arraycopy(m_AttValues, 0, tempValues, 0, m_AttValues.length);
tempValues[index] = value;
m_AttValues = tempValues;
} else {
double[] tempValues = new double[m_AttValues.length - 1];
int[] tempIndices = new int[m_Indices.length - 1];
System.arraycopy(m_AttValues, 0, tempValues, 0, index);
System.arraycopy(m_Indices, 0, tempIndices, 0, index);
System.arraycopy(m_AttValues, index + 1, tempValues, index,
m_AttValues.length - index - 1);
System.arraycopy(m_Indices, index + 1, tempIndices, index,
m_Indices.length - index - 1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -