📄 offsetfeatureconjunction.java
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. *//** Create new feature from the conjunction of features from given offsets that match given regular expressions. This can be seen as hand-coding in a few of the conjunctions that you'd get from {@link OffsetConjunctions}. <P> For example, creating a pipe with <TT>new OffsetFeatureConjunction ("TIME", new String[] { "number", "W=:" "number" }, new int[] { 0, 1, 2 })<TT> will create a feature that is true whenever all of (a) a feature at the current time matches "number" (b) a feature at the next time step matches "W=:" (b) a feature 2 timesteps from now match "number", so that you have a simple time detector. <P>If the conjunction passes, then either the first timestep (that is, the one all the offsets were computed from), or all matching timesteps, get the feature "TIME" --- depending on the value of the field tagAllTimesteps. @author Charles Sutton <a href="mailto:casutton@cs.umass.edu">casutton@cs.umass.edu</a> @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.pipe.tsf;import edu.umass.cs.mallet.base.pipe.Pipe;import edu.umass.cs.mallet.base.types.Instance;import edu.umass.cs.mallet.base.types.TokenSequence;import edu.umass.cs.mallet.base.types.Token;import edu.umass.cs.mallet.base.util.PropertyList;import java.io.IOException;import java.io.ObjectInputStream;import java.io.ObjectOutputStream;import java.io.Serializable;import java.util.regex.Pattern;public class OffsetFeatureConjunction extends Pipe implements Serializable{ private String thisFeatureName; private Pattern[] featurePatterns; private int[] offsets; private boolean[] isNonNegated; private boolean tagAllTimesteps; /** * Create a Pipe for adding conjunctions of specified features. * @param thisFeatureName Name of this conjunction feature. * @param featureNames String giving name for each subfeature i. * @param offsets For each subfeature i, which offset from the current timestep * must i appear at. * @param isNonNegated If element i is false, then the negation of the * feature is added to the conjuction. */ public OffsetFeatureConjunction (String thisFeatureName, String[] featureNames, int[] offsets, boolean[] isNonNegated, boolean tagAllTimesteps) { this.thisFeatureName = thisFeatureName; this.featurePatterns = patternify (featureNames); this.offsets = offsets; this.isNonNegated = isNonNegated; this.tagAllTimesteps = tagAllTimesteps; } private static boolean[] trueArray (int length) { boolean[] ret = new boolean[length]; for (int i = 0; i < length; i++) ret[i] = true; return ret; } private Pattern[] patternify (String[] regex) { Pattern[] retval = new Pattern [regex.length]; for (int i = 0; i < regex.length; i++) { retval [i] = Pattern.compile (regex[i]); } return retval; } public OffsetFeatureConjunction (String thisFeatureName, String[] featureNames, int[] offsets, boolean tagAllTimesteps) { this (thisFeatureName, featureNames, offsets, trueArray(featureNames.length), tagAllTimesteps); } public OffsetFeatureConjunction (String thisFeatureName, String[] featureNames, int[] offsets) { this (thisFeatureName, featureNames, offsets, trueArray(featureNames.length), false); } public Instance pipe (Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); int tsSize = ts.size(); for (int t = 0; t < tsSize; t++) { // Check whether the conjunction is true at time step t boolean passes = true; for (int fnum = 0; fnum < featurePatterns.length; fnum++) { int pos = t + offsets[fnum]; if (!(pos >= 0 && pos < tsSize)) { passes = false; break; } boolean featurePresent = hasMatchingFeature (ts.getToken(pos), featurePatterns [fnum]); if (featurePresent != isNonNegated [fnum]) { passes = false; break; } } if (passes) { if (tagAllTimesteps) { for (int fnum = 0; fnum < featurePatterns.length; fnum++) { int pos = t + offsets[fnum]; ts.getToken(pos).setFeatureValue (thisFeatureName, 1.0); } } else { ts.getToken(t).setFeatureValue (thisFeatureName, 1.0); } } } return carrier; } private boolean hasMatchingFeature (Token token, Pattern pattern) { PropertyList.Iterator iter = token.getFeatures ().iterator (); while (iter.hasNext()) { iter.next(); if (pattern.matcher (iter.getKey()). matches ()) { if (iter.getNumericValue() == 1.0) { return true; } } } return false; } // Serialization private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; private static final int NULL_INTEGER = -1; private void writeObject (ObjectOutputStream out) throws IOException { out.writeInt (CURRENT_SERIAL_VERSION); out.writeObject (thisFeatureName); int size; size = (featurePatterns == null) ? NULL_INTEGER : featurePatterns.length; out.writeInt(size); if (size != NULL_INTEGER) { for (int i = 0; i <size; i++) { out.writeObject (featurePatterns[i]); out.writeInt (offsets[i]); out.writeBoolean (isNonNegated[i]); } } } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int size; int version = in.readInt (); thisFeatureName = (String) in.readObject(); size = in.readInt();; if (size == NULL_INTEGER) { featurePatterns = null; offsets = null; isNonNegated = null; } else { featurePatterns = new Pattern[size]; offsets = new int[size]; isNonNegated = new boolean[size]; for (int i = 0; i < size; i++) { featurePatterns[i] = (Pattern) in.readObject(); offsets[i] = in.readInt(); isNonNegated[i] = in.readBoolean(); } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -