📄 sparsefloatvector.java

📁 一个自然语言处理的Java开源工具包。LingPipe目前已有很丰富的功能
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * LingPipe v. 3.5 * Copyright (C) 2003-2008 Alias-i * * This program is licensed under the Alias-i Royalty Free License * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i * Royalty Free License Version 1 for more details. * * You should have received a copy of the Alias-i Royalty Free License * Version 1 along with this program; if not, visit * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211, * +1 (718) 290-9170. */package com.aliasi.matrix;import com.aliasi.util.AbstractExternalizable;import java.io.IOException;import java.io.ObjectInput;import java.io.ObjectOutput;import java.io.Serializable;import java.util.Arrays;import java.util.HashMap;import java.util.Map;/** * A <code>SparseFloatVector</code> implements an immutable sparse * vector with values represented as single-precision floating point * numbers.  Sparse vectors are specified in terms of mappings from * integer dimensions to single-precision floating-point values.  The * constructor allows the number of dimensions to be set, or to be * inferred as the largest dimension with a value in the mapping. * Dimensions for which no value is specified in the map provided to * the constructor will have values of 0.0. * * <p>A deep copy is made of the map provided to the constructor, so * that changes to the specified map do not affect this vector and * changes to this vector do not affect the map. * * <p><i>Implementation Note:</i> The underlying data is stored in a * pair of parallel arrays, one containing integer indexes and the * other values of type <code>float</code>.  The constructor computes * and stores the fixed number of dimensions.  The constructor also * stores the length of the vector by walking over the values.  Dot * products between sparse vectors are computed at double-precision by * walking over the indices and doing a merge, which is the most * efficient approach if the vectors are roughly the same size.  Dot * products with other vector implementations are computed by * iterating over the indexes in the sparse vector and looking up the * corresponding values in the argument vector.  Cosines are computed * by dividing dot products by lengths. * * <p>Equality versus other sparse float vectors only considers indexes * with values.  Hash codes also only consider indexes with values, * computing a shift and mask as well as an integer multiply and add * for each dimension. * * @author  Bob Carpenter * @version 3.5.1 * @since   LingPipe3.1 */public class SparseFloatVector    extends AbstractVector    implements Serializable {    final int[] mKeys;    final float[] mValues;    final int mNumDimensions;    final double mLength;    /**     * Construct a sparse vector from the specified map.  The     * dimensionality will be fixed to the largest integer with a     * value specified in the map.  See the class documentation for     * information details.     *     * @param map Mapping from dimensions to values.     * @throws IllegalArgumentException If there are negative keys.     */    public SparseFloatVector(Map<Integer,? extends Number> map) {        this(map,-1,false);    }    /**     * Constructs a sparse vector from the specified map with the     * specified number of dimensions.  See the class documentation     * for further implementation details.     *     * @param map Mapping from dimensions to values.     * @param numDimensions Number of dimensions for the constructed vector.     * @throws IllegalArgumentException If there are negative keys, or if the     * specified number of dimensions is negative, or if the specified number of     * dimensions is not greater than or equal to the largest integer key.     */    public SparseFloatVector(Map<Integer,? extends Number> map, int numDimensions) {        this(map,numDimensions,true);    }    /**     * Construct a sparse floating point vector with the specified     * keys defined at the specified values with the specified number     * of dimensions.  The keys must be non-negative and sorted in     * ascending order, no two keys may be equal, and no key may be     * equal to or greater than the number of dimensions.     *     * @param keys Array of keys indicating the defined dimensions.     * @param values Array of values for specified dimensions.     * @param numDimensions The dimensionality of the constructed vector.     * @throws IllegalArgumentException If the keys are not in ascending order,     * if a key is negative, if two keys are the same, or if a key is greater     * than or equal to the number of dimensions.     */    public SparseFloatVector(int[] keys, float[] values, int numDimensions) {        this(keys,values,numDimensions,constructorLength(values));        if (keys.length != values.length) {            String msg = "Keys and values must be same length."                + " Found keys.length=" + keys.length                + " values.length=" + values.length;            throw new IllegalArgumentException(msg);        }        for (int i = 1; i < keys.length; ++i) {            if (keys[i-1] >= keys[i]) {                String msg = "Keys must be in strictly ascending order."                    + " Found keys[" + (i-1) + "]=" + keys[i-1]                    + " keys[" + i + "]=" + keys[i];                throw new IllegalArgumentException(msg);            }        }        if (keys.length > 0 && keys[keys.length-1] >= numDimensions) {            String msg = "Keys must be less than number of dimensions."                + " Found numDimensions=" + numDimensions                + " keys[" + (keys.length-1) + "]=" + keys[keys.length-1];            throw new IllegalArgumentException(msg);        }    }    static double constructorLength(float[] vs) {        double sum = 0;        for (int i = 0; i < vs.length; ++i)            sum += vs[i] * vs[i];        return Math.sqrt(sum);    }    SparseFloatVector(int[] keys, float[] values,                      int numDimensions, double length) {        if (numDimensions < 0) {            String msg = "Dimensionality must be positive."                + " Found numDimensions=" + numDimensions;            throw new IllegalArgumentException(msg);        }        mKeys = keys;        mValues = values;        mNumDimensions = numDimensions;        mLength = length;    }    private SparseFloatVector(Map<Integer,? extends Number> map,                              int numDimensions, boolean useDims) {        Integer[] keys = map.keySet().<Integer>toArray(new Integer[map.size()]);        Arrays.sort(keys);        int[] newKeys = new int[keys.length];        for (int i = 0; i < keys.length; ++i)            newKeys[i] = keys[i].intValue();        if (newKeys.length > 0 && newKeys[0] < 0) {            String msg = "All keys must be non-negative."                + " Found key=" + newKeys[0];            throw new IllegalArgumentException(msg);        }        float[] values = new float[keys.length];        for (int i = 0; i < keys.length; ++i)            values[i] = map.get(keys[i]).floatValue();        mKeys = newKeys;        mValues = values;        if (mKeys.length > 0 && mKeys[mKeys.length-1] == Integer.MAX_VALUE) {            String msg = "Maximum dimension is Integer.MAX_VALUE-1"                + " Found dimension=Integer.MAX_VALUE";            throw new IllegalArgumentException(msg);        }        int maxFoundDimensions            = mKeys.length == 0 ? 0 : (mKeys[mKeys.length-1] + 1);        if (useDims) {            if (numDimensions < 0) {                String msg = "Number of dimensions must be non-negative."                    + " Found numDimensions=" + numDimensions;                throw new IllegalArgumentException(msg);            }            if (numDimensions < maxFoundDimensions) {                String msg = "Specified number of dimensions lower than largest index."                    + " Num dimensions specified=" + numDimensions                    + " Largest dimension found=" + mKeys[mKeys.length-1];                throw new IllegalArgumentException(msg);            }            mNumDimensions = numDimensions;        } else {            mNumDimensions = maxFoundDimensions;        }        mLength = computeLength(values);    }    public int numDimensions() {        return mNumDimensions;    }    /**     * Returns the array of dimensions that have non-zero values.     * This method may return dimensions with zero values if this     * vector was initialized with zero values.     *     * <p><b>Warning:</b>The returned array is the actual set of     * dimensions used for this vector implementation, so should not     * be modified.  Modifications result in a vector in an illegal     * states if the dimensions don't remain sorted and within the     * range of the dimensionality of this vector.     *     * @return The dimensions with non-zero values.     */    public int[] nonZeroDimensions() {        return mKeys;    }    /**     * This operation is not supported for sparse vectors.     *     * @param scale Ignored.     * @param v Ignored.     * @throws UnsupportedOperationException Always.     */    public void increment(double scale, Vector v) {        String msg = "Can not set values in sparse float vectors.";        throw new UnsupportedOperationException(msg);    }    public String toString() {        StringBuilder sb = new StringBuilder();        for (int i = 0; i < mValues.length; ++i) {            if (i > 0) sb.append(' ');            sb.append(mKeys[i] + "=" + mValues[i]);        }        return sb.toString();    }    public double value(int dimension) {
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -