📄 maxentresolver.java
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the//GNU Lesser General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.tools.coref.resolver;import java.io.DataInputStream;import java.io.File;import java.io.FileWriter;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import java.util.regex.Pattern;import opennlp.maxent.Event;import opennlp.maxent.GIS;import opennlp.maxent.MaxentModel;import opennlp.maxent.io.BinaryGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelWriter;import opennlp.tools.coref.DiscourseEntity;import opennlp.tools.coref.DiscourseModel;import opennlp.tools.coref.Linker;import opennlp.tools.coref.mention.MentionContext;import opennlp.tools.coref.mention.Parse;import opennlp.tools.coref.sim.GenderEnum;import opennlp.tools.coref.sim.NumberEnum;import opennlp.tools.coref.sim.TestSimilarityModel;import opennlp.tools.util.CollectionEventStream;/** * Provides common functionality used by classes which implement the {@link Resolver} class and use maximum entropy models to make resolution decisions. */public abstract class MaxentResolver extends AbstractResolver { /** Outcomes when two mentions are coreferent. */ public static final String SAME = "same"; /** Outcome when two mentions are not corefernt. */ public static final String DIFF = "diff"; /** Default feature value. */ public static final String DEFAULT = "default"; private static final Pattern endsWithPeriod = Pattern.compile("\\.$"); private final double minSimProb = 0.60; private final String SIM_COMPATIBLE = "sim.compatible"; private final String SIM_INCOMPATIBLE = "sim.incompatible"; private final String SIM_UNKNOWN = "sim.unknown"; private final String NUM_COMPATIBLE = "num.compatible"; private final String NUM_INCOMPATIBLE = "num.incompatible"; private final String NUM_UNKNOWN = "num.unknown"; private final String GEN_COMPATIBLE = "gen.compatible"; private final String GEN_INCOMPATIBLE = "gen.incompatible"; private final String GEN_UNKNOWN = "gen.unknown"; private static boolean debugOn=false; private static boolean loadAsResource=false; private String modelName; private MaxentModel model; private double[] candProbs; private int sameIndex; private ResolverMode mode; private List events; /** When true, this designates that the resolver should use the first referent encountered which it * more preferable than non-reference. When false all non-excluded referents within this resolvers range * are considered. */ protected boolean preferFirstReferent; /** When true, this designates that training should consist of a single positive and a single negitive example * (when possible) for each mention. */ protected boolean pairedSampleSelection; /** When true, this designates that the same maximum entropy model should be used non-reference * events (the pairing of a mention and the "null" reference) as is used for potentially * referential pairs. When false a seperate model is created for these events. */ protected boolean useSameModelForNonRef; private static TestSimilarityModel simModel = null; /** The model for computing non-referential probabilities. */ protected NonReferentialResolver nonReferentialResolver; private static final String modelExtension = ".bin.gz"; /** * Creates a maximum-entropy-based resolver which will look the specified number of entities back for a referent. * This constructor is only used for unit testing. * @param numberOfEntitiesBack * @param preferFirstReferent */ protected MaxentResolver(int numberOfEntitiesBack, boolean preferFirstReferent) { super(numberOfEntitiesBack); this.preferFirstReferent = preferFirstReferent; } /** * Creates a maximum-entropy-based resolver with the specified model name, using the * specified mode, which will look the specified number of entities back for a referent and * prefer the first referent if specified. * @param modelDirectory The name of the directory where the resover models are stored. * @param name The name of the file where this model will be read or written. * @param mode The mode this resolver is being using in (training, testing). * @param numberOfEntitiesBack The number of entities back in the text that this resolver will look * for a referent. * @param preferFirstReferent Set to true if the resolver should prefer the first referent which is more * likly than non-reference. This only affects testing. * @param nonReferentialResolver Determines how likly it is that this entity is non-referential. * @throws IOException If the model file is not found or can not be written to. */ public MaxentResolver(String modelDirectory, String name, ResolverMode mode, int numberOfEntitiesBack, boolean preferFirstReferent, NonReferentialResolver nonReferentialResolver) throws IOException { super(numberOfEntitiesBack); this.preferFirstReferent = preferFirstReferent; this.nonReferentialResolver = nonReferentialResolver; this.mode = mode; this.modelName = modelDirectory+"/"+name; if (ResolverMode.TEST == this.mode) { if (loadAsResource) { model = (new BinaryGISModelReader(new DataInputStream(this.getClass().getResourceAsStream(modelName+modelExtension)))).getModel(); } else { model = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel(); } sameIndex = model.getIndex(SAME); } else if (ResolverMode.TRAIN == this.mode) { events = new ArrayList(); } else { System.err.println("Unknown mode: " + this.mode); } //add one for non-referent possibility candProbs = new double[getNumEntities() + 1]; } /** * Creates a maximum-entropy-based resolver with the specified model name, using the * specified mode, which will look the specified number of entities back for a referent. * @param modelDirectory The name of the directory where the resover models are stored. * @param modelName The name of the file where this model will be read or written. * @param mode The mode this resolver is being using in (training, testing). * @param numberEntitiesBack The number of entities back in the text that this resolver will look * for a referent. * @throws IOException If the model file is not found or can not be written to. */ public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack) throws IOException { this(modelDirectory, modelName, mode, numberEntitiesBack, false); } public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, NonReferentialResolver nonReferentialResolver) throws IOException { this(modelDirectory, modelName, mode, numberEntitiesBack, false,nonReferentialResolver); } public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, boolean preferFirstReferent) throws IOException { //this(projectName, modelName, mode, numberEntitiesBack, preferFirstReferent, SingletonNonReferentialResolver.getInstance(projectName,mode)); this(modelDirectory, modelName, mode, numberEntitiesBack, preferFirstReferent, new DefaultNonReferentialResolver(modelDirectory, modelName, mode)); } public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, boolean preferFirstReferent, double nonReferentialProbability) throws IOException { //this(projectName, modelName, mode, numberEntitiesBack, preferFirstReferent, SingletonNonReferentialResolver.getInstance(projectName,mode)); this(modelDirectory, modelName, mode, numberEntitiesBack, preferFirstReferent, new FixedNonReferentialResolver(nonReferentialProbability)); } /** * Specifies whether the models should be loaded from a resource. * @param lar boolean which if true indicates that the model should be loaded as a resource. */ public static void loadAsResource(boolean lar) { loadAsResource = lar; } /** * Returns whether the models should be loaded from a file or from a resource. * @return whether the models should be loaded from a file or from a resource. */ public static boolean loadAsResource() { return loadAsResource; } public DiscourseEntity resolve(MentionContext ec, DiscourseModel dm) { DiscourseEntity de; int ei = 0; double nonReferentialProbability = nonReferentialResolver.getNonReferentialProbability(ec); if (debugOn) { System.err.println(this +".resolve: " + ec.toText() + " -> " + "null "+nonReferentialProbability); } for (; ei < getNumEntities(dm); ei++) { de = (DiscourseEntity) dm.getEntity(ei); if (outOfRange(ec, de)) { break; } if (excluded(ec, de)) { candProbs[ei] = 0; if (debugOn) { System.err.println("excluded "+this +".resolve: " + ec.toText() + " -> " + de + " " + candProbs[ei]); } } else { List lfeatures = getFeatures(ec, de); String[] features = (String[]) lfeatures.toArray(new String[lfeatures.size()]); try { candProbs[ei] = model.eval(features)[sameIndex]; } catch (ArrayIndexOutOfBoundsException e) { candProbs[ei] = 0; } if (debugOn) { System.err.println(this +".resolve: " + ec.toText() + " -> " + de + " ("+ec.getGender()+","+de.getGender()+") " + candProbs[ei] + " " + lfeatures); } } if (preferFirstReferent && candProbs[ei] > nonReferentialProbability) { ei++; //update for nonRef assignment break; } } candProbs[ei] = nonReferentialProbability; // find max int maxCandIndex = 0; for (int k = 1; k <= ei; k++) { if (candProbs[k] > candProbs[maxCandIndex]) { maxCandIndex = k; } } if (maxCandIndex == ei) { // no referent return (null); } else { de = (DiscourseEntity) dm.getEntity(maxCandIndex); return (de); } } /* protected double getNonReferentialProbability(MentionContext ec) { if (useFixedNonReferentialProbability) { if (debugOn) { System.err.println(this +".resolve: " + ec.toText() + " -> " + null +" " + fixedNonReferentialProbability); System.err.println(); } return fixedNonReferentialProbability; } List lfeatures = getFeatures(ec, null); String[] features = (String[]) lfeatures.toArray(new String[lfeatures.size()]); if (features == null) { System.err.println("features=null in " + this); } if (model == null) { System.err.println("model=null in " + this); } double[] dist = nrModel.eval(features); if (dist == null) { System.err.println("dist=null in " + this); } if (debugOn) { System.err.println(this +".resolve: " + ec.toText() + " -> " + null +" " + dist[nrSameIndex] + " " + lfeatures); System.err.println(); } return (dist[nrSameIndex]); } */ /** * Returns whether the specified entity satisfies the criteria for being a default referent. * This criteria is used to perform sample selection on the training data and to select a single * non-referent entity. Typcically the criteria is a hueristic for a likly referent. * @param de The discourse entity being considered for non-reference. * @return True if the entity should be used as a default referent, false otherwise. */ protected boolean defaultReferent(DiscourseEntity de) { MentionContext ec = de.getLastExtent();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -