📄 treebanklinker.java

📁 自然语言处理领域的一个开发包
💻 JAVA
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//GNU Lesser General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.tools.lang.english;import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.Arrays;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import opennlp.tools.coref.*;import opennlp.tools.coref.DiscourseEntity;import opennlp.tools.coref.Linker;import opennlp.tools.coref.LinkerMode;import opennlp.tools.coref.mention.DefaultParse;import opennlp.tools.coref.mention.Mention;import opennlp.tools.coref.mention.MentionContext;import opennlp.tools.coref.mention.PTBMentionFinder;import opennlp.tools.parser.Parse;import opennlp.tools.parser.ParserME;import opennlp.tools.util.Span;/** * This class perform coreference for treebank style parses.   * It will only perform coreference over constituents defined in the trees and * will not generate new constituents for pre-nominal entities or sub-entities in  * simple coordinated noun phrases.  This linker requires that named-entity information also be provided.   * This information can be added to the parse using the -parse option with EnglishNameFinder. */public class TreebankLinker extends DefaultLinker {    public TreebankLinker(String project, LinkerMode mode) throws IOException {    super(project,mode);  }    public TreebankLinker(String project, LinkerMode mode, boolean useDiscourseModel) throws IOException {    super(project,mode,useDiscourseModel);  }     public TreebankLinker(String project, LinkerMode mode, boolean useDiscourseModel, double fixedNonReferentialProbability) throws IOException {    super(project,mode,useDiscourseModel,fixedNonReferentialProbability);  }    protected void initMentionFinder() {    mentionFinder = PTBMentionFinder.getInstance(headFinder);  }    private static void showEntities(DiscourseEntity[] entities) {    for (int ei=0,en=entities.length;ei<en;ei++) {     System.out.println(ei+" "+entities[ei]);    }  }    /**   * Identitifies corefernce relationships for parsed input passed via standard in.   * @param args The model directory.   * @throws IOException when the model directory can not be read.   */  public static void main(String[] args) throws IOException {    if (args.length == 0) {      System.err.println("Usage: TreebankLinker model_directory < parses");      System.exit(1);    }    BufferedReader in;    int ai =0;    String dataDir = args[ai++];    if (ai == args.length) {      in = new BufferedReader(new InputStreamReader(System.in));    }    else {      in = new BufferedReader(new FileReader(args[ai]));    }    Linker treebankLinker = new TreebankLinker(dataDir,LinkerMode.TEST);    int sentenceNumber = 0;    List document = new ArrayList();    List parses = new ArrayList();    for (String line=in.readLine();null != line;line = in.readLine()) {      if (line.equals("")) {        DiscourseEntity[] entities = treebankLinker.getEntities((Mention[]) document.toArray(new Mention[document.size()]));        //showEntities(entities);        new CorefParse(parses,entities).show();        sentenceNumber=0;        document.clear();        parses.clear();      }      else {        Parse p = Parse.parseParse(line);        parses.add(p);        Mention[] extents = treebankLinker.getMentionFinder().getMentions(new DefaultParse(p,sentenceNumber));        //construct new parses for mentions which don't have constituents.        for (int ei=0,en=extents.length;ei<en;ei++) {          //System.err.println("PennTreebankLiner.main: "+ei+" "+extents[ei]);                    if (extents[ei].getParse() == null) {            Parse snp = new Parse(p.getText(),extents[ei].getSpan(),"NML",1.0);            p.insert(snp);            extents[ei].setParse(new DefaultParse(snp,sentenceNumber));          }                  }        document.addAll(Arrays.asList(extents));        sentenceNumber++;      }    }    if (document.size() > 0) {      DiscourseEntity[] entities = treebankLinker.getEntities((Mention[]) document.toArray(new Mention[document.size()]));      //showEntities(entities);      (new CorefParse(parses,entities)).show();    }  }}class CorefParse {    private Map parseMap;  private List parses;    public CorefParse(List parses, DiscourseEntity[] entities) {    this.parses = parses;    parseMap = new HashMap();    for (int ei=0,en=entities.length;ei<en;ei++) {      if (entities[ei].getNumMentions() > 1) {        for (Iterator mi=entities[ei].getMentions();mi.hasNext();) {          MentionContext mc = (MentionContext) mi.next();          Parse mentionParse = ((DefaultParse) mc.getParse()).getParse();          parseMap.put(mentionParse,new Integer(ei+1));          //System.err.println("CorefParse: "+mc.getParse().hashCode()+" -> "+ (ei+1));        }      }    }  }    public void show() {    for (int pi=0,pn=parses.size();pi<pn;pi++) {      Parse p = (Parse) parses.get(pi);      show(p);      System.out.println();    }  }    private void show(Parse p) {    int start;    start = p.getSpan().getStart();    if (!p.getType().equals(ParserME.TOK_NODE)) {      System.out.print("(");      System.out.print(p.getType());      if (parseMap.containsKey(p)) {        System.out.print("#"+parseMap.get(p));      }      //System.out.print(p.hashCode()+"-"+parseMap.containsKey(p));      System.out.print(" ");    }    Parse[] children = p.getChildren();    for (int pi=0,pn=children.length;pi<pn;pi++) {      Parse c = children[pi];      Span s = c.getSpan();      if (start < s.getStart()) {        System.out.print(p.getText().substring(start, s.getStart()));      }      show(c);      start = s.getEnd();    }    System.out.print(p.getText().substring(start, p.getSpan().getEnd()));    if (!p.getType().equals(ParserME.TOK_NODE)) {      System.out.print(")");    }  }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -