📄 similaritymodel.java

📁 自然语言处理领域的一个开发包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//GNU Lesser General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.tools.coref.sim;import java.io.BufferedReader;import java.io.File;import java.io.FileWriter;import java.io.IOException;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import opennlp.maxent.Event;import opennlp.maxent.GIS;import opennlp.maxent.MaxentModel;import opennlp.maxent.io.PlainTextGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelWriter;import opennlp.tools.coref.resolver.AbstractResolver;import opennlp.tools.coref.resolver.MaxentResolver;import opennlp.tools.util.CollectionEventStream;import opennlp.tools.util.HashList;/** * Models semantic similarity between two mentions and returns a score based on  * how semantically comparible the mentions are with one another.   */public class SimilarityModel implements TestSimilarityModel, TrainSimilarityModel {  private String modelName;  private String modelExtension = ".bin.gz";  private MaxentModel testModel;  private List events;  private int SAME_INDEX;  private static final String SAME = "same";  private static final String DIFF = "diff";  private boolean debugOn = false;  public static TestSimilarityModel testModel(String name) throws IOException {    return new SimilarityModel(name, false);  }  public static TrainSimilarityModel trainModel(String name) throws IOException {    SimilarityModel sm = new SimilarityModel(name, true);    return sm;  }  private SimilarityModel(String modelName, boolean train) throws IOException {    this.modelName = modelName;     if (train) {      events = new ArrayList();    }    else {      if (MaxentResolver.loadAsResource()) {        testModel = (new PlainTextGISModelReader(new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(modelName))))).getModel();      }      else {        testModel = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();      }      SAME_INDEX = testModel.getIndex(SAME);    }  }  private void addEvent(boolean same, Context np1, Context np2) {    if (same) {      List feats = getFeatures(np1, np2);      //System.err.println(SAME+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+" ("+np2.id+") "+feats);      events.add(new Event(SAME, (String[]) feats.toArray(new String[feats.size()])));    }    else {      List feats = getFeatures(np1, np2);      //System.err.println(DIFF+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+" ("+np2.id+") "+feats);      events.add(new Event(DIFF, (String[]) feats.toArray(new String[feats.size()])));    }  }  /**   * Produces a set of head words for the specified list of mentions.   * @param mentions The mentions to use to construct the    * @return A set containing the head words of the sepecified mentions.   */  private Set constructHeadSet(List mentions) {    Set headSet = new HashSet();    for (Iterator ei = mentions.iterator(); ei.hasNext();) {      Context ec = (Context) ei.next();      headSet.add(ec.getHeadTokenText().toLowerCase());    }    return headSet;  }  private boolean hasSameHead(Set entityHeadSet, Set candidateHeadSet) {    for (Iterator hi = entityHeadSet.iterator(); hi.hasNext();) {      if (candidateHeadSet.contains(hi.next())) {        return true;      }    }    return false;  }  private boolean hasSameNameType(Set entityNameSet, Set candidateNameSet) {    for (Iterator hi = entityNameSet.iterator(); hi.hasNext();) {      if (candidateNameSet.contains(hi.next())) {        return true;      }    }    return false;  }  private boolean hasSuperClass(List entityContexts, List candidateContexts) {    for (Iterator ei = entityContexts.iterator(); ei.hasNext();) {      Context ec = (Context) ei.next();      for (Iterator cei = candidateContexts.iterator(); cei.hasNext();) {        if (inSuperClass(ec, (Context) cei.next())) {          return true;        }      }    }    return false;  }  /**   * Constructs a set of entities which may be semantically compatible with the entity indicated by the specified entityKey.   * @param entityKey The key of the entity for which the set is being constructed.    * @param entities A mapping between entity keys and their meantions.    * @param headSets A mapping between entity keys and their head sets.   * @param nameSets A mapping between entity keys and their name sets.   * @param singletons A list of all entities which consists of a single mentions.   * @return A set of mentions for all the entities which might be semantically compatible    * with entity indicated by the specified key.    */  private Set constructExclusionSet(Integer entityKey, HashList entities, Map headSets, Map nameSets, List singletons) {    Set exclusionSet = new HashSet();    Set entityHeadSet = (Set) headSets.get(entityKey);    Set entityNameSet = (Set) nameSets.get(entityKey);    List entityContexts = (List) entities.get(entityKey);    //entities    for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) {      Integer key = (Integer) ei.next();      List candidateContexts = (List) entities.get(key);      if (key.equals(entityKey)) {        exclusionSet.addAll(candidateContexts);      }      else if (((Set) nameSets.get(key)).isEmpty()) {        exclusionSet.addAll(candidateContexts);      }      else if (hasSameHead(entityHeadSet, (Set) headSets.get(key))) {        exclusionSet.addAll(candidateContexts);      }      else if (hasSameNameType(entityNameSet, (Set) nameSets.get(key))) {        exclusionSet.addAll(candidateContexts);      }      else if (hasSuperClass(entityContexts, candidateContexts)) {        exclusionSet.addAll(candidateContexts);      }    }    //singles    List singles = new ArrayList(1);    for (Iterator si = singletons.iterator(); si.hasNext();) {      Context sc = (Context) si.next();      singles.clear();      singles.add(sc);      if (entityHeadSet.contains(sc.getHeadTokenText().toLowerCase())) {        exclusionSet.add(sc);      }      else if (sc.getNameType() == null) {        exclusionSet.add(sc);      }      else if (entityNameSet.contains(sc.getNameType())) {        exclusionSet.add(sc);      }      else if (hasSuperClass(entityContexts, singles)) {        exclusionSet.add(sc);      }    }    return exclusionSet;  }  /**   * Constructs a mapping between the specified entities and their head set.   * @param entities Mapping between a key and a list of meanions which compose an entity.   * @return a mapping between the keys of the secified entity mapping and the head set    * generatated from the mentions associated with that key.   */  private Map constructHeadSets(HashList entities) {    Map headSets = new HashMap();    for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) {      Integer key = (Integer) ei.next();      List entityContexts = (List) entities.get(key);      headSets.put(key, constructHeadSet(entityContexts));    }    return headSets;  }  /**   * Produces the set of name types associated with each of the specified mentions.   * @param mentions A list of mentions.   * @return A set set of name types assigned to the specified mentions.   */  private Set constructNameSet(List mentions) {    Set nameSet = new HashSet();    for (Iterator ei = mentions.iterator(); ei.hasNext();) {      Context ec = (Context) ei.next();      if (ec.getNameType() != null) {        nameSet.add(ec.getNameType());      }    }    return nameSet;  }  /**   * Constructs a mappng between the specified entities and the names associated with these entities.   * @param entities A mapping between a key and a list of mentions.   * @return a mapping between each key in the specified entity map and the name types associated with the each mention of that entity.   */  private Map constructNameSets(HashList entities) {    Map nameSets = new HashMap();    for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) {      Integer key = (Integer) ei.next();      List entityContexts = (List) entities.get(key);      nameSets.put(key, constructNameSet(entityContexts));    }    return nameSets;  }  private boolean inSuperClass(Context ec, Context cec) {    if (ec.getSynsets().size() == 0 || cec.getSynsets().size() == 0) {      return false;    }    else {      int numCommonSynsets = 0;      for (Iterator si = ec.getSynsets().iterator(); si.hasNext();) {        Object synset = si.next();        if (cec.getSynsets().contains(synset)) {          numCommonSynsets++;        }      }      if (numCommonSynsets == 0) {        return false;      }      else if (numCommonSynsets == ec.getSynsets().size() || numCommonSynsets == cec.getSynsets().size()) {        return true;      }      else {        return false;      }    }  }  /*  private boolean isPronoun(MentionContext mention) {    return mention.getHeadTokenTag().startsWith("PRP");  }  */      public void setExtents(Context[] extentContexts) {    HashList entities = new HashList();    /** Extents which are not in a coreference chain. */    List singletons = new ArrayList();    List allExtents = new ArrayList();    //populate data structures    for (int ei = 0, el = extentContexts.length; ei < el; ei++) {      Context ec = extentContexts[ei];      //System.err.println("SimilarityModel: setExtents: ec("+ec.getId()+") "+ec.getNameType()+" "+ec);      if (ec.getId() == -1) {        singletons.add(ec);      }      else {        entities.put(new Integer(ec.getId()), ec);      }      allExtents.add(ec);    }        int axi = 0;    Map headSets = constructHeadSets(entities);    Map nameSets = constructNameSets(entities);        for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) {      Integer key = (Integer) ei.next();      Set entityNameSet = (Set) nameSets.get(key);      if (entityNameSet.isEmpty()) {        continue;      }      List entityContexts = (List) entities.get(key);            Set exclusionSet = constructExclusionSet(key, entities, headSets, nameSets, singletons);      if (entityContexts.size() == 1) {      }      for (int xi1 = 0, xl = entityContexts.size(); xi1 < xl; xi1++) {        Context ec1 = (Context) entityContexts.get(xi1);        //if (isPronoun(ec1)) {        //  continue;        //}        for (int xi2 = xi1 + 1; xi2 < xl; xi2++) {          Context ec2 = (Context) entityContexts.get(xi2);          //if (isPronoun(ec2)) {          //  continue;          //}          addEvent(true, ec1, ec2);          int startIndex = axi;          do {            Context sec1 = (Context) allExtents.get(axi);            axi = (axi + 1) % allExtents.size();
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -