📄 similaritymodel.java
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the//GNU Lesser General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.tools.coref.sim;import java.io.BufferedReader;import java.io.File;import java.io.FileWriter;import java.io.IOException;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import opennlp.maxent.Event;import opennlp.maxent.GIS;import opennlp.maxent.MaxentModel;import opennlp.maxent.io.PlainTextGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelWriter;import opennlp.tools.coref.resolver.AbstractResolver;import opennlp.tools.coref.resolver.MaxentResolver;import opennlp.tools.util.CollectionEventStream;import opennlp.tools.util.HashList;/** * Models semantic similarity between two mentions and returns a score based on * how semantically comparible the mentions are with one another. */public class SimilarityModel implements TestSimilarityModel, TrainSimilarityModel { private String modelName; private String modelExtension = ".bin.gz"; private MaxentModel testModel; private List events; private int SAME_INDEX; private static final String SAME = "same"; private static final String DIFF = "diff"; private boolean debugOn = false; public static TestSimilarityModel testModel(String name) throws IOException { return new SimilarityModel(name, false); } public static TrainSimilarityModel trainModel(String name) throws IOException { SimilarityModel sm = new SimilarityModel(name, true); return sm; } private SimilarityModel(String modelName, boolean train) throws IOException { this.modelName = modelName; if (train) { events = new ArrayList(); } else { if (MaxentResolver.loadAsResource()) { testModel = (new PlainTextGISModelReader(new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(modelName))))).getModel(); } else { testModel = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel(); } SAME_INDEX = testModel.getIndex(SAME); } } private void addEvent(boolean same, Context np1, Context np2) { if (same) { List feats = getFeatures(np1, np2); //System.err.println(SAME+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+" ("+np2.id+") "+feats); events.add(new Event(SAME, (String[]) feats.toArray(new String[feats.size()]))); } else { List feats = getFeatures(np1, np2); //System.err.println(DIFF+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+" ("+np2.id+") "+feats); events.add(new Event(DIFF, (String[]) feats.toArray(new String[feats.size()]))); } } /** * Produces a set of head words for the specified list of mentions. * @param mentions The mentions to use to construct the * @return A set containing the head words of the sepecified mentions. */ private Set constructHeadSet(List mentions) { Set headSet = new HashSet(); for (Iterator ei = mentions.iterator(); ei.hasNext();) { Context ec = (Context) ei.next(); headSet.add(ec.getHeadTokenText().toLowerCase()); } return headSet; } private boolean hasSameHead(Set entityHeadSet, Set candidateHeadSet) { for (Iterator hi = entityHeadSet.iterator(); hi.hasNext();) { if (candidateHeadSet.contains(hi.next())) { return true; } } return false; } private boolean hasSameNameType(Set entityNameSet, Set candidateNameSet) { for (Iterator hi = entityNameSet.iterator(); hi.hasNext();) { if (candidateNameSet.contains(hi.next())) { return true; } } return false; } private boolean hasSuperClass(List entityContexts, List candidateContexts) { for (Iterator ei = entityContexts.iterator(); ei.hasNext();) { Context ec = (Context) ei.next(); for (Iterator cei = candidateContexts.iterator(); cei.hasNext();) { if (inSuperClass(ec, (Context) cei.next())) { return true; } } } return false; } /** * Constructs a set of entities which may be semantically compatible with the entity indicated by the specified entityKey. * @param entityKey The key of the entity for which the set is being constructed. * @param entities A mapping between entity keys and their meantions. * @param headSets A mapping between entity keys and their head sets. * @param nameSets A mapping between entity keys and their name sets. * @param singletons A list of all entities which consists of a single mentions. * @return A set of mentions for all the entities which might be semantically compatible * with entity indicated by the specified key. */ private Set constructExclusionSet(Integer entityKey, HashList entities, Map headSets, Map nameSets, List singletons) { Set exclusionSet = new HashSet(); Set entityHeadSet = (Set) headSets.get(entityKey); Set entityNameSet = (Set) nameSets.get(entityKey); List entityContexts = (List) entities.get(entityKey); //entities for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) { Integer key = (Integer) ei.next(); List candidateContexts = (List) entities.get(key); if (key.equals(entityKey)) { exclusionSet.addAll(candidateContexts); } else if (((Set) nameSets.get(key)).isEmpty()) { exclusionSet.addAll(candidateContexts); } else if (hasSameHead(entityHeadSet, (Set) headSets.get(key))) { exclusionSet.addAll(candidateContexts); } else if (hasSameNameType(entityNameSet, (Set) nameSets.get(key))) { exclusionSet.addAll(candidateContexts); } else if (hasSuperClass(entityContexts, candidateContexts)) { exclusionSet.addAll(candidateContexts); } } //singles List singles = new ArrayList(1); for (Iterator si = singletons.iterator(); si.hasNext();) { Context sc = (Context) si.next(); singles.clear(); singles.add(sc); if (entityHeadSet.contains(sc.getHeadTokenText().toLowerCase())) { exclusionSet.add(sc); } else if (sc.getNameType() == null) { exclusionSet.add(sc); } else if (entityNameSet.contains(sc.getNameType())) { exclusionSet.add(sc); } else if (hasSuperClass(entityContexts, singles)) { exclusionSet.add(sc); } } return exclusionSet; } /** * Constructs a mapping between the specified entities and their head set. * @param entities Mapping between a key and a list of meanions which compose an entity. * @return a mapping between the keys of the secified entity mapping and the head set * generatated from the mentions associated with that key. */ private Map constructHeadSets(HashList entities) { Map headSets = new HashMap(); for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) { Integer key = (Integer) ei.next(); List entityContexts = (List) entities.get(key); headSets.put(key, constructHeadSet(entityContexts)); } return headSets; } /** * Produces the set of name types associated with each of the specified mentions. * @param mentions A list of mentions. * @return A set set of name types assigned to the specified mentions. */ private Set constructNameSet(List mentions) { Set nameSet = new HashSet(); for (Iterator ei = mentions.iterator(); ei.hasNext();) { Context ec = (Context) ei.next(); if (ec.getNameType() != null) { nameSet.add(ec.getNameType()); } } return nameSet; } /** * Constructs a mappng between the specified entities and the names associated with these entities. * @param entities A mapping between a key and a list of mentions. * @return a mapping between each key in the specified entity map and the name types associated with the each mention of that entity. */ private Map constructNameSets(HashList entities) { Map nameSets = new HashMap(); for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) { Integer key = (Integer) ei.next(); List entityContexts = (List) entities.get(key); nameSets.put(key, constructNameSet(entityContexts)); } return nameSets; } private boolean inSuperClass(Context ec, Context cec) { if (ec.getSynsets().size() == 0 || cec.getSynsets().size() == 0) { return false; } else { int numCommonSynsets = 0; for (Iterator si = ec.getSynsets().iterator(); si.hasNext();) { Object synset = si.next(); if (cec.getSynsets().contains(synset)) { numCommonSynsets++; } } if (numCommonSynsets == 0) { return false; } else if (numCommonSynsets == ec.getSynsets().size() || numCommonSynsets == cec.getSynsets().size()) { return true; } else { return false; } } } /* private boolean isPronoun(MentionContext mention) { return mention.getHeadTokenTag().startsWith("PRP"); } */ public void setExtents(Context[] extentContexts) { HashList entities = new HashList(); /** Extents which are not in a coreference chain. */ List singletons = new ArrayList(); List allExtents = new ArrayList(); //populate data structures for (int ei = 0, el = extentContexts.length; ei < el; ei++) { Context ec = extentContexts[ei]; //System.err.println("SimilarityModel: setExtents: ec("+ec.getId()+") "+ec.getNameType()+" "+ec); if (ec.getId() == -1) { singletons.add(ec); } else { entities.put(new Integer(ec.getId()), ec); } allExtents.add(ec); } int axi = 0; Map headSets = constructHeadSets(entities); Map nameSets = constructNameSets(entities); for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) { Integer key = (Integer) ei.next(); Set entityNameSet = (Set) nameSets.get(key); if (entityNameSet.isEmpty()) { continue; } List entityContexts = (List) entities.get(key); Set exclusionSet = constructExclusionSet(key, entities, headSets, nameSets, singletons); if (entityContexts.size() == 1) { } for (int xi1 = 0, xl = entityContexts.size(); xi1 < xl; xi1++) { Context ec1 = (Context) entityContexts.get(xi1); //if (isPronoun(ec1)) { // continue; //} for (int xi2 = xi1 + 1; xi2 < xl; xi2++) { Context ec2 = (Context) entityContexts.get(xi2); //if (isPronoun(ec2)) { // continue; //} addEvent(true, ec1, ec2); int startIndex = axi; do { Context sec1 = (Context) allExtents.get(axi); axi = (axi + 1) % allExtents.size();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -