📄 sparseinstance.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * SparseInstance.java * Copyright (C) 2000 Eibe Frank * */package weka.core;import java.util.*;import java.io.*;/** * Class for storing an instance as a sparse vector. A sparse instance * only requires storage for those attribute values that are non-zero. * Since the objective is to reduce storage requirements for datasets * with large numbers of default values, this also includes nominal * attributes -- the first nominal value (i.e. that which has index 0) * will not require explicit storage, so rearrange your nominal attribute * value orderings if necessary. Missing values will be stored * explicitly. */public class SparseInstance extends Instance { /** The index of the attribute associated with each stored value. */ protected int[] m_Indices; /** The maximum number of values that can be stored. */ protected int m_NumAttributes; protected SparseInstance() { } /** * Constructor that generates a sparse instance from the given * instance. Reference to the dataset is set to null. * (ie. the instance doesn't have access to information about the * attribute types) * * @param instance the instance from which the attribute values * and the weight are to be copied */ public SparseInstance(Instance instance) { m_Weight = instance.m_Weight; m_Dataset = null; m_NumAttributes = instance.numAttributes(); if (instance instanceof SparseInstance) { m_AttValues = ((SparseInstance)instance).m_AttValues; m_Indices = ((SparseInstance)instance).m_Indices; } else { double[] tempValues = new double[instance.numAttributes()]; int[] tempIndices = new int[instance.numAttributes()]; int vals = 0; for (int i = 0; i < instance.numAttributes(); i++) { if (instance.value(i) != 0) { tempValues[vals] = instance.value(i); tempIndices[vals] = i; vals++; } } m_AttValues = new double[vals]; m_Indices = new int[vals]; System.arraycopy(tempValues, 0, m_AttValues, 0, vals); System.arraycopy(tempIndices, 0, m_Indices, 0, vals); } } /** * Constructor that copies the info from the given instance. * Reference to the dataset is set to null. * (ie. the instance doesn't have access to information about the * attribute types) * * @param instance the instance from which the attribute * info is to be copied */ public SparseInstance(SparseInstance instance) { m_AttValues = instance.m_AttValues; m_Indices = instance.m_Indices; m_Weight = instance.m_Weight; m_NumAttributes = instance.m_NumAttributes; m_Dataset = null; } /** * Constructor that generates a sparse instance from the given * parameters. Reference to the dataset is set to null. * (ie. the instance doesn't have access to information about the * attribute types) * * @param weight the instance's weight * @param attValues a vector of attribute values */ public SparseInstance(double weight, double[] attValues) { m_Weight = weight; m_Dataset = null; m_NumAttributes = attValues.length; double[] tempValues = new double[m_NumAttributes]; int[] tempIndices = new int[m_NumAttributes]; int vals = 0; for (int i = 0; i < m_NumAttributes; i++) { if (attValues[i] != 0) { tempValues[vals] = attValues[i]; tempIndices[vals] = i; vals++; } } m_AttValues = new double[vals]; m_Indices = new int[vals]; System.arraycopy(tempValues, 0, m_AttValues, 0, vals); System.arraycopy(tempIndices, 0, m_Indices, 0, vals); } /** * Constructor that inititalizes instance variable with given * values. Reference to the dataset is set to null. (ie. the instance * doesn't have access to information about the attribute types) * * @param weight the instance's weight * @param attValues a vector of attribute values (just the ones to be stored) * @param indices the indices of the given values in the full vector * @param maxNumValues the maximium number of values that can be stored */ public SparseInstance(double weight, double[] attValues, int[] indices, int maxNumValues){ int vals = 0; m_AttValues = new double [attValues.length]; m_Indices = new int [indices.length]; for (int i = 0; i < attValues.length; i++) { if (attValues[i] != 0) { m_AttValues[vals] = attValues[i]; m_Indices[vals] = indices[i]; vals++; } } if (vals != attValues.length) { // Need to truncate. double [] newVals = new double[vals]; System.arraycopy(m_AttValues, 0, newVals, 0, vals); m_AttValues = newVals; int [] newIndices = new int[vals]; System.arraycopy(m_Indices, 0, newIndices, 0, vals); m_Indices = newIndices; } m_Weight = weight; m_NumAttributes = maxNumValues; m_Dataset = null; } /** * Constructor of an instance that sets weight to one, all values to * be missing, and the reference to the dataset to null. (ie. the instance * doesn't have access to information about the attribute types) * * @param numAttributes the size of the instance */ public SparseInstance(int numAttributes) { m_AttValues = new double[numAttributes]; m_NumAttributes = numAttributes; m_Indices = new int[numAttributes]; for (int i = 0; i < m_AttValues.length; i++) { m_AttValues[i] = MISSING_VALUE; m_Indices[i] = i; } m_Weight = 1; m_Dataset = null; } /** * Returns the attribute associated with the internal index. * * @param indexOfIndex the index of the attribute's index * @return the attribute at the given position * @exception UnassignedDatasetException if instance doesn't have access to a * dataset */ public Attribute attributeSparse(int indexOfIndex) { if (m_Dataset == null) { throw new UnassignedDatasetException("Instance doesn't have access to a dataset!"); } return m_Dataset.attribute(m_Indices[indexOfIndex]); } /** * Produces a shallow copy of this instance. The copy has * access to the same dataset. (if you want to make a copy * that doesn't have access to the dataset, use * <code>new SparseInstance(instance)</code> * * @return the shallow copy */ public Object copy() { Instance result = new SparseInstance(this); result.m_Dataset = m_Dataset; return result; } /** * Returns the index of the attribute stored at the given position. * * @param position the position * @return the index of the attribute stored at the given position */ public int index(int position) { return m_Indices[position]; } /** * Tests if a specific value is "missing". * * @param attIndex the attribute's index */ public boolean isMissing(int attIndex) { if (Double.isNaN(value(attIndex))) { return true; } return false; } /** * Locates the greatest index that is not greater than the * given index. * * @return the internal index of the attribute index. Returns * -1 if no index with this property couldn't be found */ public int locateIndex(int index) { int min = 0, max = m_Indices.length - 1; // Binary search while (max >= min) { int current = (max + min) / 2; if (m_Indices[current] > index) { max = current - 1; } else if (m_Indices[current] < index) { min = current + 1; } else { return current; } } return max; } /** * Merges this instance with the given instance and returns * the result. Dataset is set to null. * * @param inst the instance to be merged with this one * @return the merged instances */ public Instance mergeInstance(Instance inst) { double[] values = new double[numValues() + inst.numValues()]; int[] indices = new int[numValues() + inst.numValues()]; int m = 0; for (int j = 0; j < numValues(); j++, m++) { values[m] = valueSparse(j); indices[m] = index(j); } for (int j = 0; j < inst.numValues(); j++, m++) { values[m] = inst.valueSparse(j); indices[m] = inst.index(j) + inst.numAttributes(); } return new SparseInstance(1.0, values, indices, numAttributes() + inst.numAttributes()); } /** * Returns the number of attributes. * * @return the number of attributes as an integer */ public int numAttributes() { return m_NumAttributes; } /** * Returns the number of values in the sparse vector. * * @return the number of values */ public int numValues() { return m_Indices.length; } /** * Replaces all missing values in the instance with the * values contained in the given array. A deep copy of * the vector of attribute values is performed before the * values are replaced. * * @param array containing the means and modes * @exception IllegalArgumentException if numbers of attributes are unequal */ public void replaceMissingValues(double[] array) { if ((array == null) || (array.length != m_NumAttributes)) { throw new IllegalArgumentException("Unequal number of attributes!"); } double[] tempValues = new double[m_AttValues.length]; int[] tempIndices = new int[m_AttValues.length]; int vals = 0; for (int i = 0; i < m_AttValues.length; i++) { if (isMissingValue(m_AttValues[i])) { if (array[m_Indices[i]] != 0) { tempValues[vals] = array[m_Indices[i]]; tempIndices[vals] = m_Indices[i]; vals++; } } else { tempValues[vals] = m_AttValues[i]; tempIndices[vals] = m_Indices[i]; vals++; } } m_AttValues = new double[vals]; m_Indices = new int[vals]; System.arraycopy(tempValues, 0, m_AttValues, 0, vals); System.arraycopy(tempIndices, 0, m_Indices, 0, vals); } /** * Sets a specific value in the instance to the given value * (internal floating-point format). Performs a deep copy * of the vector of attribute values before the value is set. * * @param attIndex the attribute's index * @param value the new attribute value (If the corresponding * attribute is nominal (or a string) then this is the new value's * index as a double). */ public void setValue(int attIndex, double value) { int index = locateIndex(attIndex); if ((index >= 0) && (m_Indices[index] == attIndex)) { if (value != 0) { double[] tempValues = new double[m_AttValues.length]; System.arraycopy(m_AttValues, 0, tempValues, 0, m_AttValues.length); tempValues[index] = value; m_AttValues = tempValues; } else { double[] tempValues = new double[m_AttValues.length - 1]; int[] tempIndices = new int[m_Indices.length - 1]; System.arraycopy(m_AttValues, 0, tempValues, 0, index); System.arraycopy(m_Indices, 0, tempIndices, 0, index); System.arraycopy(m_AttValues, index + 1, tempValues, index, m_AttValues.length - index - 1); System.arraycopy(m_Indices, index + 1, tempIndices, index, m_Indices.length - index - 1); m_AttValues = tempValues; m_Indices = tempIndices; } } else { if (value != 0) { double[] tempValues = new double[m_AttValues.length + 1]; int[] tempIndices = new int[m_Indices.length + 1];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -