📄 sparsevector.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */package edu.umass.cs.mallet.base.types;import java.util.Arrays;import edu.umass.cs.mallet.base.util.PropertyList;import java.io.*;import java.lang.reflect.Method;import java.lang.reflect.InvocationTargetException;/** A vector that allocates memory only for non-zero values. When you create a SparseVector, you pass in a list of indices. These are the only elements of the vector you will be allowed to change. The rest are fixed at 0. The interface to Sparse vector uses the concept of a location, which is an integer in the range 0..numLocations which can be mapped to the index (and value) of a non zero element of the vector. A SparseVector can be sparse or dense depending on whether or not an array if indices is specified at construction time. If the SparseVector is dense, the mapping from location to index is the identity mapping. The type of the value an element in a SparseVector (or FeatureVector) can be double or binary (0.0 or 1.0), depending on whether an array of doubles is specified at contruction time. @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>*/public class SparseVector implements ConstantMatrix, Vector, Serializable{ /** If the vector is sparse, then both indices and values are sparse. Indices into these arrays are called ``locations'' in the below. The indices[] array maps locations to indices of the (virtual) dense array that's being represented. value[] maps locations to values. */ protected int[] indices; // if this is null, then the vector is dense protected double[] values; // if this is null, then the vector is binary /** If "indices" is null, the vector will be dense. If "values" is null, the vector will be binary. The capacity and size arguments are used by AugmentableFeatureVector. */ public SparseVector (int[] indices, double[] values, int capacity, int size, boolean copy, boolean checkIndicesSorted, boolean removeDuplicates) { // "size" was pretty much ignored??? Why? int length; length = size; if (capacity < length) capacity = length; assert (size <= length); assert (values == null || indices == null || indices.length == values.length); if (copy || capacity > length) { if (indices == null) this.indices = null; else { this.indices = new int[capacity]; System.arraycopy (indices, 0, this.indices, 0, length); } if (values == null) this.values = null; else { this.values = new double[capacity]; System.arraycopy (values, 0, this.values, 0, length); } } else { this.indices = indices; this.values = values; } if (checkIndicesSorted) sortIndices (); // This also removes duplicates else if (removeDuplicates) removeDuplicates (0); } // Create a dense Vector public SparseVector (double[] values, boolean copy) { this (null, values, values.length, values.length, copy, false, false); } public SparseVector (double[] values) { this (values, true); } public SparseVector (int size, double fillValue) { this (newArrayOfValue (size, fillValue), false); } public SparseVector (int[] indices, double[] values, boolean copy, boolean checkIndicesSorted, boolean removeDuplicates) { this (indices, values, (indices != null) ? indices.length : values.length, (indices != null) ? indices.length : values.length, copy, checkIndicesSorted, removeDuplicates); } public SparseVector (int[] indices, double[] values) { this (indices, values, true, true, true); } public SparseVector (int[] indices, double[] values, boolean copy) { this (indices, values, copy, true, true); } public SparseVector (int[] indices, double[] values, boolean copy, boolean checkIndicesSorted) { this (indices, values, copy, checkIndicesSorted, true); } // Create a vector that is possibly binary or non-binary public SparseVector (int[] indices, boolean copy, boolean checkIndicesSorted, boolean removeDuplicates, boolean binary) { this (indices, binary ? null : newArrayOfValue(indices.length,1.0), indices.length, indices.length, copy, checkIndicesSorted, removeDuplicates); } // Create a binary vector public SparseVector (int[] indices, int capacity, int size, boolean copy, boolean checkIndicesSorted, boolean removeDuplicates) { this (indices, null, capacity, size, copy, checkIndicesSorted, removeDuplicates); } public SparseVector (int[] indices, boolean copy, boolean checkIndicesSorted) { this (indices, null, copy, checkIndicesSorted, true); } public SparseVector (int[] indices, boolean copy) { this (indices, null, copy, true, true); } public SparseVector (int[] indices) { this (indices, null, true, true, true); } /** An empty vector, with all zero values */ public SparseVector () { this (new int[0], new double[0], false, false); } public SparseVector (Alphabet dict, PropertyList pl, boolean binary, boolean growAlphabet) { if (pl == null) { // xxx Fix SparseVector so that it can properly represent a vector that has all zeros. // Does this work? indices = new int[0]; values = null; return; } AugmentableFeatureVector afv = new AugmentableFeatureVector (dict, binary); //afv.print(); //System.out.println ("SparseVector binary="+binary); //pl.print(); PropertyList.Iterator iter = pl.numericIterator(); while (iter.hasNext()) { iter.nextProperty(); //System.out.println ("SparseVector adding "+iter.getKey()+" "+iter.getNumericValue()); int index = dict.lookupIndex(iter.getKey(), growAlphabet); if (index >=0) { afv.add (index, iter.getNumericValue()); } //System.out.println ("SparseVector afv adding "+iter.getKey()+" afv.numLocations="+afv.numLocations()); } //afv.print(); // xxx Not so efficient? SparseVector sv = afv.toSparseVector(); //System.out.println ("SparseVector sv.numLocations="+sv.numLocations()); this.indices = sv.indices; this.values = sv.values; } public SparseVector (Alphabet dict, PropertyList pl, boolean binary) { this(dict, pl, binary, true); } private static double[] newArrayOfValue (int length, double value) { double[] ret = new double[length]; Arrays.fill (ret, value); return ret; } public boolean isBinary () { return values == null; } public void makeBinary () { throw new UnsupportedOperationException ("Not yet implemented"); } public void makeNonBinary () { throw new UnsupportedOperationException ("Not yet implemented"); } /*********************************************************************** * ACCESSORS ***********************************************************************/ public int getNumDimensions () { return 1; } // xxx What do we return for the length? It could be higher than this index. public int getDimensions (int[] sizes) { if (indices == null) sizes[0] = values.length; else // xxx This is pretty unsatisfactory, since there may be zero // values above this location. sizes[0] = indices[indices.length-1]; return 1; } // necessary for the SVM implementation! -dmetzler // ...but be careful, this is allowed to be null! -cas public int [] getIndices() { return indices; } // necessary for the SVM implementation! -dmetzler // ...but be careful, this is allowed to be null! -cas public double [] getValues() { return values; } // xxx This is just the number of non-zero entries... // This is different behavior than Matrix2!! public int numLocations () { return (values == null ? (indices == null ? 0 : indices.length) : values.length); } public int location (int index) { if (indices == null) return index; else return Arrays.binarySearch (indices, index); } public double valueAtLocation (int location) { return values == null ? 1.0 : values[location]; } public int indexAtLocation (int location) { return indices == null ? location : indices[location]; } public double value (int[] indices) { assert (indices.length == 1); if (indices == null) return values[indices[0]]; else return values[location(indices[0])]; } public double value (int index) { if (indices == null) try { return values[index]; } catch (ArrayIndexOutOfBoundsException e) { return 0.0; } else { int loc = location(index); if (loc < 0) return 0.0; else if (values == null) return 1.0; else return values[location(index)]; } } public void addTo (double[] accumulator, double scale) { if (indices == null) { for (int i = 0; i < values.length; i++) accumulator[i] += values[i] * scale; } else if (values == null) { for (int i = 0; i < indices.length; i++) accumulator[indices[i]] += scale; } else { for (int i = 0; i < indices.length; i++) accumulator[indices[i]] += values[i] * scale; } } public void addTo (double[] accumulator) { addTo (accumulator, 1.0); } public int singleIndex (int[] indices) { assert (indices.length == 1); return indices[0]; } public void singleToIndices (int i, int[] indices) { indices[0] = i; } public double singleValue (int i) { return value(i); } public int singleSize () { if (indices == null) return values.length; else if (indices.length == 0) return 0; else // This is just the highest index that will have non-zero value. // The full size of this dimension is "unknown" return indices[indices.length-1]; } public String toString() { return this.toString(false); } public String toString(boolean onOneLine) { StringBuffer sb = new StringBuffer (); for (int i = 0; i < values.length; i++) { sb.append((indices == null ? i : indices[i])); sb.append ("="); sb.append (values[i]); if (!onOneLine) sb.append ("\n"); else sb.append (' '); } return sb.toString(); } /*********************************************************************** * CLONING ***********************************************************************/ public ConstantMatrix cloneMatrix () { if (indices == null) return new SparseVector (values); else return new SparseVector (indices, values, true, false, false); } public ConstantMatrix cloneMatrixZeroed () { if (indices == null) return new SparseVector (new double[values.length]); else { int[] newIndices = new int[indices.length]; System.arraycopy (indices, 0, newIndices, 0, indices.length); return new SparseVector (newIndices, new double[values.length], true, false, false); } } /*********************************************************************** * MUTATORS ***********************************************************************/ /** * For each index i that is present in this vector, * set this[i] += v[i]. * If v has indices that are not present in this, * these are just ignored. */ public void plusEqualsSparse (SparseVector v) { plusEqualsSparse (v, 1.0); } /** * For each index i that is present in this vector, * set this[i] += factor * v[i]. * If v has indices that are not present in this, * these are just ignored. */ public void plusEqualsSparse (SparseVector v, double factor) { // Special case for dense sparse vector if (indices == null) { densePlusEqualsSparse (v, factor); return; } int loc1 = 0; int loc2 = 0; while ((loc1 < numLocations()) && (loc2 < v.numLocations())) { int idx1 = indexAtLocation (loc1); int idx2 = v.indexAtLocation (loc2); if (idx1 == idx2) { values [loc1] += v.valueAtLocation (loc2) * factor; ++loc1; ++loc2; } else if (idx1 < idx2) { ++loc1; } else { // idx2 not present in this. Ignore. ++loc2; } } } /** * For each index i that is present in this vector, * set this[i] *= v[i]. * If v has indices that are not present in this, * these are just ignored. */ public void timesEqualsSparse (SparseVector v) { timesEqualsSparse (v, 1.0); } /** * For each index i that is present in this vector,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -