⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sparseinstance.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    SparseInstance.java
 *    Copyright (C) 2000 Eibe Frank
 *
 */

package weka.core;

import java.util.Enumeration;

/**
 * Class for storing an instance as a sparse vector. A sparse instance
 * only requires storage for those attribute values that are non-zero.
 * Since the objective is to reduce storage requirements for datasets
 * with large numbers of default values, this also includes nominal
 * attributes -- the first nominal value (i.e. that which has index 0)
 * will not require explicit storage, so rearrange your nominal attribute
 * value orderings if necessary. Missing values will be stored
 * explicitly.
 */
public class SparseInstance extends Instance {

  /** The index of the attribute associated with each stored value. */
  protected int[] m_Indices;

  /** The maximum number of values that can be stored. */
  protected int m_NumAttributes;

  protected SparseInstance() {

  }

  /**
   * Constructor that generates a sparse instance from the given
   * instance. Reference to the dataset is set to null.
   * (ie. the instance doesn't have access to information about the
   * attribute types)
   *
   * @param instance the instance from which the attribute values
   * and the weight are to be copied
   */
  public SparseInstance(Instance instance) {
    
    m_Weight = instance.m_Weight;
    m_Dataset = null;
    m_NumAttributes = instance.numAttributes();
    if (instance instanceof SparseInstance) {
      m_AttValues = ((SparseInstance)instance).m_AttValues;
      m_Indices = ((SparseInstance)instance).m_Indices;
    } else {
      double[] tempValues = new double[instance.numAttributes()];
      int[] tempIndices = new int[instance.numAttributes()];
      int vals = 0;
      for (int i = 0; i < instance.numAttributes(); i++) {
	if (instance.value(i) != 0) {
	  tempValues[vals] = instance.value(i);
	  tempIndices[vals] = i;
	  vals++;
	}
      }
      m_AttValues = new double[vals];
      m_Indices = new int[vals];
      System.arraycopy(tempValues, 0, m_AttValues, 0, vals);
      System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
    }
  }

  /**
   * Constructor that copies the info from the given instance. 
   * Reference to the dataset is set to null.
   * (ie. the instance doesn't have access to information about the
   * attribute types)
   *
   * @param instance the instance from which the attribute
   * info is to be copied 
   */
  public SparseInstance(SparseInstance instance) {
    
    m_AttValues = instance.m_AttValues;
    m_Indices = instance.m_Indices;
    m_Weight = instance.m_Weight;
    m_NumAttributes = instance.m_NumAttributes;
    m_Dataset = null;
  }

  /**
   * Constructor that generates a sparse instance from the given
   * parameters. Reference to the dataset is set to null.
   * (ie. the instance doesn't have access to information about the
   * attribute types)
   *
   * @param weight the instance's weight
   * @param attValues a vector of attribute values 
   */
  public SparseInstance(double weight, double[] attValues) {
    
    m_Weight = weight;
    m_Dataset = null;
    m_NumAttributes = attValues.length;
    double[] tempValues = new double[m_NumAttributes];
    int[] tempIndices = new int[m_NumAttributes];
    int vals = 0;
    for (int i = 0; i < m_NumAttributes; i++) {
      if (attValues[i] != 0) {
	tempValues[vals] = attValues[i];
	tempIndices[vals] = i;
	vals++;
      }
    }
    m_AttValues = new double[vals];
    m_Indices = new int[vals];
    System.arraycopy(tempValues, 0, m_AttValues, 0, vals);
    System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
  }
  
  /**
   * Constructor that inititalizes instance variable with given
   * values. Reference to the dataset is set to null. (ie. the instance
   * doesn't have access to information about the attribute types)
   * Note that the indices need to be sorted in ascending order. Otherwise
   * things won't work properly.
   *
   * @param weight the instance's weight
   * @param attValues a vector of attribute values (just the ones to be stored)
   * @param indices the indices of the given values in the full vector (need to
   * be sorted in ascending order)
   * @param maxNumValues the maximium number of values that can be stored
   */
  public SparseInstance(double weight, double[] attValues,
			int[] indices, int maxNumValues){
    
    int vals = 0; 
    m_AttValues = new double [attValues.length];
    m_Indices = new int [indices.length];
    for (int i = 0; i < attValues.length; i++) {
      if (attValues[i] != 0) {
        m_AttValues[vals] = attValues[i];
        m_Indices[vals] = indices[i];
        vals++;
      }
    }
    if (vals != attValues.length) {
      // Need to truncate.
      double [] newVals = new double[vals];
      System.arraycopy(m_AttValues, 0, newVals, 0, vals);
      m_AttValues = newVals;
      int [] newIndices = new int[vals];
      System.arraycopy(m_Indices, 0, newIndices, 0, vals);
      m_Indices = newIndices;
    }
    m_Weight = weight;
    m_NumAttributes = maxNumValues;
    m_Dataset = null;
  }

  /**
   * Constructor of an instance that sets weight to one, all values to
   * be missing, and the reference to the dataset to null. (ie. the instance
   * doesn't have access to information about the attribute types)
   *
   * @param numAttributes the size of the instance 
   */
  public SparseInstance(int numAttributes) {
    
    m_AttValues = new double[numAttributes];
    m_NumAttributes = numAttributes;
    m_Indices = new int[numAttributes];
    for (int i = 0; i < m_AttValues.length; i++) {
      m_AttValues[i] = MISSING_VALUE;
      m_Indices[i] = i;
    }
    m_Weight = 1;
    m_Dataset = null;
  }

  /**
   * Returns the attribute associated with the internal index. 
   *
   * @param indexOfIndex the index of the attribute's index 
   * @return the attribute at the given position
   * @exception UnassignedDatasetException if instance doesn't have access to a
   * dataset
   */ 
  public Attribute attributeSparse(int indexOfIndex) {
   
    if (m_Dataset == null) {
      throw new UnassignedDatasetException("Instance doesn't have access to a dataset!");
    }
    return m_Dataset.attribute(m_Indices[indexOfIndex]);
  }

  /**
   * Produces a shallow copy of this instance. The copy has
   * access to the same dataset. (if you want to make a copy
   * that doesn't have access to the dataset, use 
   * <code>new SparseInstance(instance)</code>
   *
   * @return the shallow copy
   */
  public Object copy() {

    Instance result = new SparseInstance(this);
    result.m_Dataset = m_Dataset;
    return result;
  }

  /**
   * Returns the index of the attribute stored at the given position.
   *
   * @param position the position 
   * @return the index of the attribute stored at the given position
   */
  public int index(int position) {

    return m_Indices[position];
  }

  /**
   * Tests if a specific value is "missing".
   *
   * @param attIndex the attribute's index
   */
  public boolean isMissing(int attIndex) {

    if (Double.isNaN(value(attIndex))) {
      return true;
    }
    return false;
  }

  /**
   * Locates the greatest index that is not greater than the
   * given index.
   *
   * @return the internal index of the attribute index. Returns
   * -1 if no index with this property could be found
   */
  public int locateIndex(int index) {

    int min = 0, max = m_Indices.length - 1;

    if (max == -1) {
      return -1;
    }

    // Binary search
    while ((m_Indices[min] <= index) && (m_Indices[max] >= index)) {
      int current = (max + min) / 2;
      if (m_Indices[current] > index) {
	max = current - 1;
      } else if (m_Indices[current] < index) {
	min = current + 1;
      } else {
	return current;
      }
    }
    if (m_Indices[max] < index) {
      return max;
    } else {
      return min - 1;
    }
  }

  /**
   * Merges this instance with the given instance and returns
   * the result. Dataset is set to null.
   *
   * @param inst the instance to be merged with this one
   * @return the merged instances
   */
  public Instance mergeInstance(Instance inst) {

    double[] values = new double[numValues() + inst.numValues()];
    int[] indices = new int[numValues() + inst.numValues()];

    int m = 0;
    for (int j = 0; j < numValues(); j++, m++) {
      values[m] = valueSparse(j);
      indices[m] = index(j);
    }
    for (int j = 0; j < inst.numValues(); j++, m++) {
      values[m] = inst.valueSparse(j);
      indices[m] = inst.index(j) + inst.numAttributes();
    }
    
    return new SparseInstance(1.0, values, indices, numAttributes() +
			      inst.numAttributes());
  }

  /**
   * Returns the number of attributes.
   *
   * @return the number of attributes as an integer
   */
  public int numAttributes() {

    return m_NumAttributes;
  }

  /**
   * Returns the number of values in the sparse vector.
   *
   * @return the number of values
   */
  public int numValues() {

    return m_Indices.length;
  }

  /** 
   * Replaces all missing values in the instance with the 
   * values contained in the given array. A deep copy of
   * the vector of attribute values is performed before the
   * values are replaced.
   *
   * @param array containing the means and modes
   * @exception IllegalArgumentException if numbers of attributes are unequal
   */
  public void replaceMissingValues(double[] array) {
	 
    if ((array == null) || (array.length != m_NumAttributes)) {
      throw new IllegalArgumentException("Unequal number of attributes!");
    }
    double[] tempValues = new double[m_AttValues.length];
    int[] tempIndices = new int[m_AttValues.length];
    int vals = 0;
    for (int i = 0; i < m_AttValues.length; i++) {
      if (isMissingValue(m_AttValues[i])) {
	if (array[m_Indices[i]] != 0) {
	  tempValues[vals] = array[m_Indices[i]];
	  tempIndices[vals] = m_Indices[i];
	  vals++;
	} 
      } else {
	tempValues[vals] = m_AttValues[i];
	tempIndices[vals] = m_Indices[i];
	vals++;
      }
    }
    m_AttValues = new double[vals];
    m_Indices = new int[vals];
    System.arraycopy(tempValues, 0, m_AttValues, 0, vals);
    System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
  }

  /**
   * Sets a specific value in the instance to the given value 
   * (internal floating-point format). Performs a deep copy
   * of the vector of attribute values before the value is set.
   *
   * @param attIndex the attribute's index 
   * @param value the new attribute value (If the corresponding
   * attribute is nominal (or a string) then this is the new value's
   * index as a double).  
   */
  public void setValue(int attIndex, double value) {

    int index = locateIndex(attIndex);
    
    if ((index >= 0) && (m_Indices[index] == attIndex)) {
      if (value != 0) {
	double[] tempValues = new double[m_AttValues.length];
	System.arraycopy(m_AttValues, 0, tempValues, 0, m_AttValues.length);
	tempValues[index] = value;
	m_AttValues = tempValues;
      } else {
	double[] tempValues = new double[m_AttValues.length - 1];
	int[] tempIndices = new int[m_Indices.length - 1];
	System.arraycopy(m_AttValues, 0, tempValues, 0, index);
	System.arraycopy(m_Indices, 0, tempIndices, 0, index);
	System.arraycopy(m_AttValues, index + 1, tempValues, index, 
			 m_AttValues.length - index - 1);
	System.arraycopy(m_Indices, index + 1, tempIndices, index, 
			 m_Indices.length - index - 1);
	m_AttValues = tempValues;
	m_Indices = tempIndices;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -