📄 sparsefloatvector.java
字号:
/* * LingPipe v. 3.5 * Copyright (C) 2003-2008 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */package com.aliasi.matrix;import com.aliasi.util.AbstractExternalizable;import java.io.IOException;import java.io.ObjectInput;import java.io.ObjectOutput;import java.io.Serializable;import java.util.Arrays;import java.util.HashMap;import java.util.Map;/** * A <code>SparseFloatVector</code> implements an immutable sparse * vector with values represented as single-precision floating point * numbers. Sparse vectors are specified in terms of mappings from * integer dimensions to single-precision floating-point values. The * constructor allows the number of dimensions to be set, or to be * inferred as the largest dimension with a value in the mapping. * Dimensions for which no value is specified in the map provided to * the constructor will have values of 0.0. * * <p>A deep copy is made of the map provided to the constructor, so * that changes to the specified map do not affect this vector and * changes to this vector do not affect the map. * * <p><i>Implementation Note:</i> The underlying data is stored in a * pair of parallel arrays, one containing integer indexes and the * other values of type <code>float</code>. The constructor computes * and stores the fixed number of dimensions. The constructor also * stores the length of the vector by walking over the values. Dot * products between sparse vectors are computed at double-precision by * walking over the indices and doing a merge, which is the most * efficient approach if the vectors are roughly the same size. Dot * products with other vector implementations are computed by * iterating over the indexes in the sparse vector and looking up the * corresponding values in the argument vector. Cosines are computed * by dividing dot products by lengths. * * <p>Equality versus other sparse float vectors only considers indexes * with values. Hash codes also only consider indexes with values, * computing a shift and mask as well as an integer multiply and add * for each dimension. * * @author Bob Carpenter * @version 3.5.1 * @since LingPipe3.1 */public class SparseFloatVector extends AbstractVector implements Serializable { final int[] mKeys; final float[] mValues; final int mNumDimensions; final double mLength; /** * Construct a sparse vector from the specified map. The * dimensionality will be fixed to the largest integer with a * value specified in the map. See the class documentation for * information details. * * @param map Mapping from dimensions to values. * @throws IllegalArgumentException If there are negative keys. */ public SparseFloatVector(Map<Integer,? extends Number> map) { this(map,-1,false); } /** * Constructs a sparse vector from the specified map with the * specified number of dimensions. See the class documentation * for further implementation details. * * @param map Mapping from dimensions to values. * @param numDimensions Number of dimensions for the constructed vector. * @throws IllegalArgumentException If there are negative keys, or if the * specified number of dimensions is negative, or if the specified number of * dimensions is not greater than or equal to the largest integer key. */ public SparseFloatVector(Map<Integer,? extends Number> map, int numDimensions) { this(map,numDimensions,true); } /** * Construct a sparse floating point vector with the specified * keys defined at the specified values with the specified number * of dimensions. The keys must be non-negative and sorted in * ascending order, no two keys may be equal, and no key may be * equal to or greater than the number of dimensions. * * @param keys Array of keys indicating the defined dimensions. * @param values Array of values for specified dimensions. * @param numDimensions The dimensionality of the constructed vector. * @throws IllegalArgumentException If the keys are not in ascending order, * if a key is negative, if two keys are the same, or if a key is greater * than or equal to the number of dimensions. */ public SparseFloatVector(int[] keys, float[] values, int numDimensions) { this(keys,values,numDimensions,constructorLength(values)); if (keys.length != values.length) { String msg = "Keys and values must be same length." + " Found keys.length=" + keys.length + " values.length=" + values.length; throw new IllegalArgumentException(msg); } for (int i = 1; i < keys.length; ++i) { if (keys[i-1] >= keys[i]) { String msg = "Keys must be in strictly ascending order." + " Found keys[" + (i-1) + "]=" + keys[i-1] + " keys[" + i + "]=" + keys[i]; throw new IllegalArgumentException(msg); } } if (keys.length > 0 && keys[keys.length-1] >= numDimensions) { String msg = "Keys must be less than number of dimensions." + " Found numDimensions=" + numDimensions + " keys[" + (keys.length-1) + "]=" + keys[keys.length-1]; throw new IllegalArgumentException(msg); } } static double constructorLength(float[] vs) { double sum = 0; for (int i = 0; i < vs.length; ++i) sum += vs[i] * vs[i]; return Math.sqrt(sum); } SparseFloatVector(int[] keys, float[] values, int numDimensions, double length) { if (numDimensions < 0) { String msg = "Dimensionality must be positive." + " Found numDimensions=" + numDimensions; throw new IllegalArgumentException(msg); } mKeys = keys; mValues = values; mNumDimensions = numDimensions; mLength = length; } private SparseFloatVector(Map<Integer,? extends Number> map, int numDimensions, boolean useDims) { Integer[] keys = map.keySet().<Integer>toArray(new Integer[map.size()]); Arrays.sort(keys); int[] newKeys = new int[keys.length]; for (int i = 0; i < keys.length; ++i) newKeys[i] = keys[i].intValue(); if (newKeys.length > 0 && newKeys[0] < 0) { String msg = "All keys must be non-negative." + " Found key=" + newKeys[0]; throw new IllegalArgumentException(msg); } float[] values = new float[keys.length]; for (int i = 0; i < keys.length; ++i) values[i] = map.get(keys[i]).floatValue(); mKeys = newKeys; mValues = values; if (mKeys.length > 0 && mKeys[mKeys.length-1] == Integer.MAX_VALUE) { String msg = "Maximum dimension is Integer.MAX_VALUE-1" + " Found dimension=Integer.MAX_VALUE"; throw new IllegalArgumentException(msg); } int maxFoundDimensions = mKeys.length == 0 ? 0 : (mKeys[mKeys.length-1] + 1); if (useDims) { if (numDimensions < 0) { String msg = "Number of dimensions must be non-negative." + " Found numDimensions=" + numDimensions; throw new IllegalArgumentException(msg); } if (numDimensions < maxFoundDimensions) { String msg = "Specified number of dimensions lower than largest index." + " Num dimensions specified=" + numDimensions + " Largest dimension found=" + mKeys[mKeys.length-1]; throw new IllegalArgumentException(msg); } mNumDimensions = numDimensions; } else { mNumDimensions = maxFoundDimensions; } mLength = computeLength(values); } public int numDimensions() { return mNumDimensions; } /** * Returns the array of dimensions that have non-zero values. * This method may return dimensions with zero values if this * vector was initialized with zero values. * * <p><b>Warning:</b>The returned array is the actual set of * dimensions used for this vector implementation, so should not * be modified. Modifications result in a vector in an illegal * states if the dimensions don't remain sorted and within the * range of the dimensionality of this vector. * * @return The dimensions with non-zero values. */ public int[] nonZeroDimensions() { return mKeys; } /** * This operation is not supported for sparse vectors. * * @param scale Ignored. * @param v Ignored. * @throws UnsupportedOperationException Always. */ public void increment(double scale, Vector v) { String msg = "Can not set values in sparse float vectors."; throw new UnsupportedOperationException(msg); } public String toString() { StringBuilder sb = new StringBuilder(); for (int i = 0; i < mValues.length; ++i) { if (i > 0) sb.append(' '); sb.append(mKeys[i] + "=" + mValues[i]); } return sb.toString(); } public double value(int dimension) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -