📄 augmentablefeaturevector.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>*/package edu.umass.cs.mallet.base.types;import edu.umass.cs.mallet.base.util.PropertyList;import java.io.*;public class AugmentableFeatureVector extends FeatureVector implements Serializable{ int size; // max index with valid indices[] or values[] value int maxSortedIndex; /* if indices != null, top of values[] and indices[] may be unsorted indices. */ // xxx Also make constructors for dense vectors, and add the appropriate // functionality in methods below. /** To make a binary vector, pass null for "values" */ public AugmentableFeatureVector (Alphabet dict, int[] indices, double[] values, int capacity, int size, boolean copy, boolean checkIndicesSorted, boolean removeDuplicates) { super (dict, indices, values, capacity, size, copy, checkIndicesSorted, removeDuplicates); this.size = size; this.maxSortedIndex = size - 1; } public AugmentableFeatureVector (Alphabet dict, int[] indices, double[] values, int capacity, boolean copy, boolean checkIndicesSorted) { this (dict, indices, values, capacity, indices.length, copy, checkIndicesSorted, true); } public AugmentableFeatureVector (Alphabet dict, int[] indices, double[] values, int capacity, boolean copy) { this (dict, indices, values, capacity, indices.length, copy, true, true); } public AugmentableFeatureVector (Alphabet dict, int[] indices, double[] values, int capacity) { this (dict, indices, values, capacity, indices.length, true, true, true); } public AugmentableFeatureVector (Alphabet dict, double[] values, int capacity) { this (dict, null, values, capacity, values.length, true, true, true); } public AugmentableFeatureVector (Alphabet dict, double[] values) { this (dict, null, values, values.length, values.length, true, true, true); } public AugmentableFeatureVector (Alphabet dict, int capacity, boolean binary) { // yyy this (dict, new int[capacity], binary ? null : new double[capacity], capacity, 0, false, false, false); } public AugmentableFeatureVector (Alphabet dict, boolean binary) { this (dict, 4, binary); } public AugmentableFeatureVector (Alphabet dict) { this (dict, false); } public AugmentableFeatureVector (FeatureVector fv) { this ((Alphabet)fv.dictionary, fv.indices, fv.values, fv.indices == null ? fv.values.length : fv.indices.length, fv.indices == null ? fv.values.length : fv.indices.length, true, false, false); } public AugmentableFeatureVector (FeatureSequence fs, boolean binary) { this (fs.getAlphabet(), binary); for (int i = fs.size()-1; i >= 0; i--) add (fs.getIndexAtPosition(i), 1.0); } public AugmentableFeatureVector (Alphabet dict, PropertyList pl, boolean binary, boolean growAlphabet) { this (dict, binary); if (pl == null) return; PropertyList.Iterator iter = pl.numericIterator(); while (iter.hasNext()) { iter.nextProperty(); //System.out.println ("AugmentableVector ("+dict.size()+") adding "+iter.getKey()+" "+iter.getNumericValue()); int index = dict.lookupIndex (iter.getKey(), growAlphabet); if (index >= 0) add (index, iter.getNumericValue()); } } public AugmentableFeatureVector (Alphabet dict, PropertyList pl, boolean binary) { this (dict, pl, binary, true); } /** * Adds all indices that are present in some other feature vector * with value 1.0. * Beware that this may have unintended effects if * <tt>fv.dictionary != this.dictionary</tt> */ public void add (FeatureVector fv) { for (int loc = 0; loc < fv.numLocations (); loc++) { int index = fv.indexAtLocation (loc); if (location (index) == -1) { add (index, 1.0); } } } // Aims to be cheap, constant time when (indices != null) public void add (int index, double value) { if (values == null && value != 1.0) throw new IllegalArgumentException ("Trying to add non-1.0 value to binary vector"); assert (index >= 0); if (indices == null) { if (index >= values.length) { int newLength = index + 10; // ??? double[] newValues = new double[newLength]; // ??? System.arraycopy (values, 0, newValues, 0, values.length); values = newValues; values[index] = value; assert (size <= index); } else { values[index] += value; } if (size <= index) size = index+1; } else { if (size == indices.length) { int newLength; if (indices.length == 0) newLength = 4; else if (indices.length < 4) newLength = indices.length * 2; else if (indices.length < 100) newLength = (indices.length * 3) / 2; else newLength = indices.length + 150; if (values != null) { double[] newValues = new double[newLength]; System.arraycopy (values, 0, newValues, 0, values.length); values = newValues; } int[] newIndices = new int[newLength]; System.arraycopy (indices, 0, newIndices, 0, indices.length); indices = newIndices; } //System.out.println ("indices.length="+indices.length+" size="+size); indices[size] = index; if (values != null) values[size] = value; size++; } } public void add (Object key, double value) { //System.out.println ("AugmentableFeatureVector dictionary = "+dictionary+", size = "+dictionary.size()); int index = dictionary.lookupIndex (key); //System.out.println ("AugmentableFeatureVector index("+key+") = "+index); assert (index != -1); add (index, value); } public void add (int index) { if (values != null) throw new IllegalArgumentException ("Trying to add binary feature to real-valued vector"); assert (index >= 0); } public final int numLocations () { if (indices == null) //return values.length; return size; if (size-1 != maxSortedIndex) sortIndices(); return size; } public final int location (int index) { if (indices == null) return index; if (size-1 != maxSortedIndex) sortIndices(); // Arrays.binarySearch (indices, index) doesn't work, because of the unused portion of the array at the end. for (int i = 0; i < size; i++) { if (indices[i] == index) return i; else if (indices[i] > index) return -1; } return -1; } public final double valueAtLocation (int location) { if (indices == null) return values[location]; if (size-1 != maxSortedIndex) sortIndices(); return super.valueAtLocation (location); } public final int indexAtLocation (int location) { if (indices == null) return location; if (size-1 != maxSortedIndex) sortIndices(); assert (location < size); return super.indexAtLocation (location); } public final double value (int index) { if (indices == null) return values[index]; if (size-1 != maxSortedIndex) sortIndices(); int loc = location(index); if (loc >= 0) { if (values == null) return 1.0; else return values[loc]; } else return 0; } public final void addTo (double[] accumulator, double scale) { if (indices != null && size-1 != maxSortedIndex) sortIndices(); if (indices == null) { for (int i = 0; i < size; i++) accumulator[i] += values[i] * scale; } else if (values == null) { for (int i = 0; i < size; i++) accumulator[indices[i]] += scale; } else { for (int i = 0; i < size; i++) accumulator[indices[i]] += values[i] * scale; } } public final void addTo (double[] accumulator) { addTo (accumulator, 1.0); } public final void setValue (int index, double value) { if (indices != null && size-1 != maxSortedIndex) sortIndices(); assert (values != null); if (indices == null) { assert (index < size); values[index] = value; } else { values[location(index)] = value; } } public final void setValueAtLocation (int location, double value) { assert (location < size); values[location] = value; } public ConstantMatrix cloneMatrix () { return new AugmentableFeatureVector ((Alphabet)dictionary, indices, values, indices.length, size, true, false, false); } public ConstantMatrix cloneMatrixZeroed () { if (indices == null) return new AugmentableFeatureVector (dictionary, new double[values.length]); else { int[] newIndices = new int[indices.length]; System.arraycopy (indices, 0, newIndices, 0, indices.length); return new AugmentableFeatureVector (dictionary, newIndices, new double[values.length], values.length, values.length, false, false, false); } } public int singleSize () { return (indices == null ? values.length : (size == 0 ? 0 : indices[size-1])); } public SparseVector toSparseVector () { if (size-1 != maxSortedIndex) sortIndices(); //System.out.println ("AugmentableFeatureVector toSparseVector size="+size); return new SparseVector (indices, values, size, size, true, false, false); } public FeatureVector toFeatureVector () { if (indices != null && size-1 != maxSortedIndex) sortIndices(); return new FeatureVector ((Alphabet)dictionary, indices, values, size, size, true, false, false); } public double dotProduct (DenseVector v) { if (indices != null && size-1 != maxSortedIndex) sortIndices(); double ret = 0; if (values == null) for (int i = 0; i < size; i++) ret += v.value(indices[i]); else if (indices == null) for (int i = 0; i < size; i++) ret += values[i] * v.value(i); else for (int i = 0; i < size; i++) ret += values[i] * v.value(indices[i]); return ret; } public final double dotProduct (SparseVector v) { if (v instanceof AugmentableFeatureVector) return dotProduct((AugmentableFeatureVector)v); if (indices != null && size-1 != maxSortedIndex) sortIndices(); double ret = 0; int vl = 0; int vnl = v.numLocations (); if (values == null) { for (int i = 0; i < size; i++) { while (vl < vnl && v.indexAtLocation(vl) < indices[i]) vl++; if (vl < vnl && v.indexAtLocation(vl) == indices[i]) ret += v.valueAtLocation(vl); } } else if (indices == null) { for (int i = 0; i < vnl; i++) { int index = v.indexAtLocation(i); if (index < size) ret += v.valueAtLocation(i) * values[index]; } } else { for (int i = 0; i < size; i++) { while (vl < vnl && v.indexAtLocation(vl) < indices[i]) vl++; if (vl < vnl && v.indexAtLocation(vl) == indices[i])
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -