⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 maxentresolver.java

📁 自然语言处理领域的一个开发包
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//GNU Lesser General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.tools.coref.resolver;import java.io.DataInputStream;import java.io.File;import java.io.FileWriter;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import java.util.regex.Pattern;import opennlp.maxent.Event;import opennlp.maxent.GIS;import opennlp.maxent.MaxentModel;import opennlp.maxent.io.BinaryGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelWriter;import opennlp.tools.coref.DiscourseEntity;import opennlp.tools.coref.DiscourseModel;import opennlp.tools.coref.Linker;import opennlp.tools.coref.mention.MentionContext;import opennlp.tools.coref.mention.Parse;import opennlp.tools.coref.sim.GenderEnum;import opennlp.tools.coref.sim.NumberEnum;import opennlp.tools.coref.sim.TestSimilarityModel;import opennlp.tools.util.CollectionEventStream;/** *  Provides common functionality used by classes which implement the {@link Resolver} class and use maximum entropy models to make resolution decisions.  */public abstract class MaxentResolver extends AbstractResolver {  /** Outcomes when two mentions are coreferent. */  public static final String SAME = "same";  /** Outcome when two mentions are not corefernt. */  public static final String DIFF = "diff";  /** Default feature value. */  public static final String DEFAULT = "default";  private static final Pattern endsWithPeriod = Pattern.compile("\\.$");  private final double minSimProb = 0.60;  private final String SIM_COMPATIBLE = "sim.compatible";  private final String SIM_INCOMPATIBLE = "sim.incompatible";  private final String SIM_UNKNOWN = "sim.unknown";  private final String NUM_COMPATIBLE = "num.compatible";  private final String NUM_INCOMPATIBLE = "num.incompatible";  private final String NUM_UNKNOWN = "num.unknown";  private final String GEN_COMPATIBLE = "gen.compatible";  private final String GEN_INCOMPATIBLE = "gen.incompatible";  private final String GEN_UNKNOWN = "gen.unknown";  private static boolean debugOn=false;    private static boolean loadAsResource=false;  private String modelName;  private MaxentModel model;  private double[] candProbs;  private int sameIndex;  private ResolverMode mode;  private List events;  /** When true, this designates that the resolver should use the first referent encountered which it   * more preferable than non-reference.  When false all non-excluded referents within this resolvers range   * are considered.    */  protected boolean preferFirstReferent;  /** When true, this designates that training should consist of a single positive and a single negitive example   * (when possible) for each mention. */  protected boolean pairedSampleSelection;  /** When true, this designates that the same maximum entropy model should be used non-reference   * events (the pairing of a mention and the "null" reference) as is used for potentially    * referential pairs.  When false a seperate model is created for these events.     */   protected boolean useSameModelForNonRef;    private static TestSimilarityModel simModel = null;  /** The model for computing non-referential probabilities. */  protected NonReferentialResolver nonReferentialResolver;    private static final String modelExtension = ".bin.gz";  /**   * Creates a maximum-entropy-based resolver which will look the specified number of entities back for a referent.   * This constructor is only used for unit testing.   * @param numberOfEntitiesBack   * @param preferFirstReferent   */  protected MaxentResolver(int numberOfEntitiesBack, boolean preferFirstReferent) {    super(numberOfEntitiesBack);    this.preferFirstReferent = preferFirstReferent;  }    /**   * Creates a maximum-entropy-based resolver with the specified model name, using the    * specified mode, which will look the specified number of entities back for a referent and   * prefer the first referent if specified.   * @param modelDirectory The name of the directory where the resover models are stored.   * @param name The name of the file where this model will be read or written.   * @param mode The mode this resolver is being using in (training, testing).   * @param numberOfEntitiesBack The number of entities back in the text that this resolver will look   * for a referent.   * @param preferFirstReferent Set to true if the resolver should prefer the first referent which is more   * likly than non-reference.  This only affects testing.   * @param nonReferentialResolver Determines how likly it is that this entity is non-referential.   * @throws IOException If the model file is not found or can not be written to.   */  public MaxentResolver(String modelDirectory, String name, ResolverMode mode, int numberOfEntitiesBack, boolean preferFirstReferent, NonReferentialResolver nonReferentialResolver) throws IOException {    super(numberOfEntitiesBack);    this.preferFirstReferent = preferFirstReferent;    this.nonReferentialResolver = nonReferentialResolver;    this.mode = mode;    this.modelName = modelDirectory+"/"+name;    if (ResolverMode.TEST == this.mode) {      if (loadAsResource) {        model = (new BinaryGISModelReader(new DataInputStream(this.getClass().getResourceAsStream(modelName+modelExtension)))).getModel();      }      else {        model = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();      }      sameIndex = model.getIndex(SAME);    }    else if (ResolverMode.TRAIN == this.mode) {      events = new ArrayList();    }    else {      System.err.println("Unknown mode: " + this.mode);    }    //add one for non-referent possibility    candProbs = new double[getNumEntities() + 1];  }  /**   * Creates a maximum-entropy-based resolver with the specified model name, using the    * specified mode, which will look the specified number of entities back for a referent.   * @param modelDirectory The name of the directory where the resover models are stored.   * @param modelName The name of the file where this model will be read or written.   * @param mode The mode this resolver is being using in (training, testing).   * @param numberEntitiesBack The number of entities back in the text that this resolver will look   * for a referent.   * @throws IOException If the model file is not found or can not be written to.   */  public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack) throws IOException {    this(modelDirectory, modelName, mode, numberEntitiesBack, false);  }    public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, NonReferentialResolver nonReferentialResolver) throws IOException {    this(modelDirectory, modelName, mode, numberEntitiesBack, false,nonReferentialResolver);  }    public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, boolean preferFirstReferent) throws IOException {    //this(projectName, modelName, mode, numberEntitiesBack, preferFirstReferent, SingletonNonReferentialResolver.getInstance(projectName,mode));    this(modelDirectory, modelName, mode, numberEntitiesBack, preferFirstReferent, new DefaultNonReferentialResolver(modelDirectory, modelName, mode));  }    public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack, boolean preferFirstReferent, double nonReferentialProbability) throws IOException {    //this(projectName, modelName, mode, numberEntitiesBack, preferFirstReferent, SingletonNonReferentialResolver.getInstance(projectName,mode));    this(modelDirectory, modelName, mode, numberEntitiesBack, preferFirstReferent, new FixedNonReferentialResolver(nonReferentialProbability));  }    /**   * Specifies whether the models should be loaded from a resource.   * @param lar boolean which if true indicates that the model should be loaded as a resource.    */  public static void loadAsResource(boolean lar) {    loadAsResource = lar;  }    /**   * Returns whether the models should be loaded from a file or from a resource.   * @return  whether the models should be loaded from a file or from a resource.   */  public static boolean loadAsResource() {    return loadAsResource;  }  public DiscourseEntity resolve(MentionContext ec, DiscourseModel dm) {    DiscourseEntity de;    int ei = 0;    double nonReferentialProbability = nonReferentialResolver.getNonReferentialProbability(ec);    if (debugOn) {      System.err.println(this +".resolve: " + ec.toText() + " -> " +  "null "+nonReferentialProbability);    }    for (; ei < getNumEntities(dm); ei++) {      de = (DiscourseEntity) dm.getEntity(ei);      if (outOfRange(ec, de)) {        break;      }      if (excluded(ec, de)) {        candProbs[ei] = 0;        if (debugOn) {          System.err.println("excluded "+this +".resolve: " + ec.toText() + " -> " + de + " " + candProbs[ei]);        }      }      else {        List lfeatures = getFeatures(ec, de);        String[] features = (String[]) lfeatures.toArray(new String[lfeatures.size()]);        try {          candProbs[ei] = model.eval(features)[sameIndex];        }        catch (ArrayIndexOutOfBoundsException e) {          candProbs[ei] = 0;        }        if (debugOn) {          System.err.println(this +".resolve: " + ec.toText() + " -> " + de + " ("+ec.getGender()+","+de.getGender()+") " + candProbs[ei] + " " + lfeatures);        }      }      if (preferFirstReferent && candProbs[ei] > nonReferentialProbability) {        ei++; //update for nonRef assignment        break;      }    }    candProbs[ei] = nonReferentialProbability;    // find max    int maxCandIndex = 0;    for (int k = 1; k <= ei; k++) {      if (candProbs[k] > candProbs[maxCandIndex]) {        maxCandIndex = k;      }    }    if (maxCandIndex == ei) { // no referent      return (null);    }    else {      de = (DiscourseEntity) dm.getEntity(maxCandIndex);      return (de);    }  }  /*  protected double getNonReferentialProbability(MentionContext ec) {    if (useFixedNonReferentialProbability) {      if (debugOn) {        System.err.println(this +".resolve: " + ec.toText() + " -> " + null +" " + fixedNonReferentialProbability);        System.err.println();      }      return fixedNonReferentialProbability;    }    List lfeatures = getFeatures(ec, null);    String[] features = (String[]) lfeatures.toArray(new String[lfeatures.size()]);    if (features == null) {      System.err.println("features=null in " + this);    }    if (model == null) {      System.err.println("model=null in " + this);    }    double[] dist = nrModel.eval(features);    if (dist == null) {      System.err.println("dist=null in " + this);    }    if (debugOn) {      System.err.println(this +".resolve: " + ec.toText() + " -> " + null +" " + dist[nrSameIndex] + " " + lfeatures);      System.err.println();    }    return (dist[nrSameIndex]);  }  */  /**   * Returns whether the specified entity satisfies the criteria for being a default referent.   * This criteria is used to perform sample selection on the training data and to select a single   * non-referent entity. Typcically the criteria is a hueristic for a likly referent.   * @param de The discourse entity being considered for non-reference.   * @return True if the entity should be used as a default referent, false otherwise.    */  protected boolean defaultReferent(DiscourseEntity de) {    MentionContext ec = de.getLastExtent();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -