augmentablefeaturevector.java

来自「mallet是自然语言处理、机器学习领域的一个开源项目。」· Java 代码 · 共 745 行 · 第 1/2 页

JAVA
745
字号
			for (int i = 0; i < size; i++)				ret += values[i] * v.value(indices[i]);		return ret;	}	public final double dotProduct (SparseVector v) {		if (v instanceof AugmentableFeatureVector)			return dotProduct((AugmentableFeatureVector)v);		if (indices != null && size-1 != maxSortedIndex)			sortIndices();		double ret = 0;		int vl = 0;		int vnl = v.numLocations ();		if (values == null) {			for (int i = 0; i < size; i++) {				while (vl < vnl && v.indexAtLocation(vl) < indices[i])					vl++;				if (vl < vnl && v.indexAtLocation(vl) == indices[i])					ret += v.valueAtLocation(vl);			}		} else if (indices == null) {			for (int i = 0; i < vnl; i++) {				int index = v.indexAtLocation(i);				if (index < size)					ret += v.valueAtLocation(i) * values[index];			}		} else {			for (int loc = 0; loc < size; loc++) {				while (vl < vnl && v.indexAtLocation(vl) < indices[loc])					vl++;				if (vl < vnl && v.indexAtLocation (vl) == indices [loc])					ret += values[loc] * v.value(indices[loc]);			}		}		return ret;	}	public final double dotProduct (AugmentableFeatureVector v) {		if (indices != null && size-1 != maxSortedIndex)			sortIndices();		if (v.indices != null && v.size-1 != v.maxSortedIndex)			v.sortIndices();		double ret = 0;		int vl = 0;		int vnl = v.size;		if (values == null) {			if (v.values == null) {				for (int i = 0; i < size; i++) {					while (vl < vnl && v.indices[vl] < indices[i])						vl++;					if (vl < vnl && v.indices[vl] == indices[i])						ret += 1.0;				}			} else {				for (int i = 0; i < size; i++) {					while (vl < vnl && v.indices[vl] < indices[i])						vl++;					if (vl < vnl && v.indices[vl] == indices[i])						ret += v.values[vl];				}			}		} else if (indices == null) {			for (int i = 0; i < vnl; i++) {				int index = v.indexAtLocation(i);				if (index < size)					ret += v.valueAtLocation(i) * values[index];			}		} else {			if (v.values == null) {				for (int i = 0; i < size; i++) {					while (vl < vnl && v.indices[vl] < indices[i])						vl++;					if (vl < vnl && v.indices[vl] == indices[i])						ret += values[i];				}			} else {				for (int i = 0; i < size; i++) {					while (vl < vnl && v.indices[vl] < indices[i])						vl++;					if (vl < vnl && v.indices[vl] == indices[i])						ret += values[i] * v.values[vl];				}			}		}		return ret;	}	public void plusEquals (AugmentableFeatureVector v, double factor) {		if (indices != null && size-1 != maxSortedIndex)			sortIndices();		if (v.indices != null && v.size-1 != v.maxSortedIndex)			v.sortIndices();		int vl = 0;		int vnl = v.size;		assert (values != null);		if (indices == null) {			if (v.indices == null) {				vnl = Math.min (vnl, size);				for (int i = 0; i < vnl; i++)					values[i] += v.values[i];			} else {				// v.indices != null				for (int i = 0; i < vnl; i++) {					int index = v.indices[i];					if (index < values.length) {						values[index] += v.values[i] * factor;						if (index >= size)							size = index+1;					}				}			}		} else {			// indices != null			if (v.indices == null) {				for (int i = 0; i < size; i++) {					if (indices[i] < vnl)						values[i] += v.values[indices[i]];					// xxx We should check to see if there were more					// higher indices in "v" that didn't get added!				}			} else {				// v.indices != null				if (v.values == null) {					// v.indices != null && v.values == null					for (int i = 0; i < size; i++) {						while (vl < vnl && v.indices[vl] < indices[i])							vl++;						if (vl < vnl && v.indices[vl] == indices[i]) 							values[i] += factor;						// xxx We should check to see if there were more						// higher indices in "v" that didn't get added!					}				} else {					// v.indices != null && v.values != null					for (int i = 0; i < size; i++) {						while (vl < vnl && v.indices[vl] < indices[i])							vl++;						if (vl < vnl && v.indices[vl] == indices[i]) 							values[i] += v.values[vl] * factor;						// xxx We should check to see if there were more						// higher indices in "v" that didn't get added!					}				}			}		}	}	// But only adds to those entries that have "locations" (i.e. are already non-zero)	public void plusEquals (SparseVector v, double factor) {		if (v instanceof AugmentableFeatureVector) {			plusEquals ((AugmentableFeatureVector)v, factor);			return;		}		//assert (false) : v.getClass().getName(); // This code needs to be checked!		if (indices != null && size-1 != maxSortedIndex)			sortIndices();		int vl = 0;		assert (values != null);		if (indices == null) {			if (v.indices == null) {				// indices == null && v.indices == null (&& v.values != null)				int s = Math.min (size, v.values.length);				for (int i = 0; i < s; i++)					values[i] += v.values[i] * factor;				// xxx We aren't adding in values with indices higher than "this.size"!			} else {				// indices == null && v.indices != null				if (v.values == null) {					// indices == null && v.indices != null && v.values == null					for (int i = 0; i < v.indices.length; i++) {						int index = v.indices[i];						if (index < size)							values[index] += factor;					}					// xxx We aren't adding in values with indices higher than "size"!				} else {					// indices == null && v.indices != null && v.values != null					for (int i = 0; i < v.indices.length; i++) {						int index = v.indices[i];						if (index < size)							values[index] += v.values[i] * factor;						// xxx We aren't adding in values with indices higher than "size"!					}				}			}		} else {			// indices != null			if (v.indices == null) {				// indices != null && v.indices == null (&& v.values != null)				for (int i = 0; i < size; i++)					if (indices[i] < v.values.length)						values[i] += v.values[indices[i]] * factor;				// xxx We aren't adding in values with indices higher than "size"!			} else {				// indices != null && v.indices != null				int vnl = v.indices.length;				if (v.values == null) {					// indices != null && v.indices != null && v.values == null					for (int i = 0; i < size; i++) {						while (vl < vnl && v.indices[vl] < indices[i])							vl++;						if (vl < vnl && v.indices[vl] == indices[i]) 							values[i] += v.values[vl] * factor;						// xxx We should check to see if there were more						// higher indices in "v" that didn't get added!					}				} else {					// indices != null && v.indices != null && v.values != null					for (int i = 0; i < size; i++) {						while (vl < vnl && v.indices[vl] < indices[i])							vl++;						if (vl < vnl && v.indices[vl] == indices[i]) 							values[i] += v.values[vl] * factor;						// xxx We should check to see if there were more						// higher indices in "v" that didn't get added!					}				}			}		}	}	public void plusEquals (SparseVector v) {		plusEquals (v, 1.0);	}		public void setAll (double v)	{		assert (values != null);		for (int i = 0; i < values.length; i++)			values[i] = v;	}		public double oneNorm () {		if (size-1 != maxSortedIndex)			sortIndices();		double ret = 0;		if (values == null)			return size;		for (int i = 0; i < size; i++)			ret += values[i];		return ret;	}		public double twoNorm () {		if (size-1 != maxSortedIndex)			sortIndices();		double ret = 0;		if (values == null)			return Math.sqrt (size);		for (int i = 0; i < size; i++)			ret += values[i] * values[i];		return Math.sqrt (ret);	}		public double infinityNorm () {		if (size-1 != maxSortedIndex)			sortIndices();		if (values == null)			return 1.0;		double max = Double.NEGATIVE_INFINITY;		for (int i = 0; i < size; i++)			if (Math.abs(values[i]) > max)				max = Math.abs(values[i]);		return max;	}			public void print() {		//System.out.println ("ASV size="+size+" dict.size="+dictionary.size()+" values.length="+values.length+" indices.length="+indices.length);		if (size-1 != maxSortedIndex)			sortIndices();		super.print();	}		protected void sortIndices ()	{				if (indices == null) // vector is dense, so indices are already sorted 			return;		else if (this.size == 0) {  // assume method called from constructor; initialize member vars			this.size = indices.length;			this.maxSortedIndex = -1;		}				// Just BubbleSort; this is efficient when already mostly sorted.		// Note that we BubbleSort from the the end forward; this is most efficient		//  when we have added a few additional items to the end of a previously sorted list.		//  Note that we remember the highest index that was already sorted as "maxSortedIndex".		// Note that maxSortedIndex may be -1 here, so the first time through the outer loop		// just falls through immediately when the termination condition of the inner loop is met.		for (int i = maxSortedIndex+1; i < size; i++) {			for (int j = i; j > 0; j--) {				if (indices[j] < indices[j-1]) {					// Swap both indices and values					int f;					f = indices[j];					indices[j] = indices[j-1];					indices[j-1] = f;					if (values != null) {						double v;						v = values[j];						values[j] = values[j-1];						values[j-1] = v;					}				}			}		}		removeDuplicates (0);		maxSortedIndex = size-1;	}		// Argument zero is special value meaning that this function should count them.	protected void removeDuplicates (int numDuplicates)	{		if (indices == null)			return;		//System.out.print ("AFV removeDuplicates ");		//for (int i = 0; i < size; i++)		//System.out.print (" " + dictionary.lookupObject(indices[i]) + "=" + indices[i]);		//System.out.println (" numDuplicates="+numDuplicates);				if (numDuplicates == 0)			for (int i = 1; i < size; i++)				if (indices[i-1] == indices[i])					numDuplicates++;		if (numDuplicates == 0)			return;		assert (indices.length - numDuplicates > 0)			: "size="+size+" indices.length="+indices.length+" numDuplicates="+numDuplicates;		int[] newIndices = new int[indices.length - numDuplicates];		double[] newValues = values == null ? null : new double[indices.length - numDuplicates];		newIndices[0] = indices[0];		assert (indices.length >= size);		for (int i = 0, j = 0; i < size-1; i++) {			if (indices[i] == indices[i+1]) {				if (values != null)					newValues[j] += values[i];			} else {				newIndices[j] = indices[i];				if (values != null)					newValues[j] += values[i];				j++;			}			if(i == size-2) {				if(values != null)					newValues[j] += values[i+1];				newIndices[j] = indices[i+1];			}		}		this.indices = newIndices;		this.values = newValues;		this.size -= numDuplicates;	}	// Serialization 			private static final long serialVersionUID = 1;	private static final int CURRENT_SERIAL_VERSION = 0;	private void writeObject (ObjectOutputStream out) throws IOException {		out.writeInt (CURRENT_SERIAL_VERSION);		out.writeInt (size);		out.writeInt (maxSortedIndex);	}	private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {		int version = in.readInt ();		size = in.readInt();		maxSortedIndex = in.readInt();	}}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?