⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 numbermodel.java

📁 自然语言处理领域的一个开发包
💻 JAVA
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton////This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.////This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//GNU Lesser General Public License for more details.////You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.tools.coref.sim;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import opennlp.maxent.Event;import opennlp.maxent.GIS;import opennlp.maxent.MaxentModel;import opennlp.maxent.io.SuffixSensitiveGISModelReader;import opennlp.maxent.io.SuffixSensitiveGISModelWriter;import opennlp.tools.coref.Linker;import opennlp.tools.util.CollectionEventStream;import opennlp.tools.util.HashList;/** * Class which models the number of particular mentions and the entities made up of mentions.  */public class NumberModel implements TestNumberModel, TrainSimilarityModel {  private String modelName;  private String modelExtension = ".bin.gz";  private MaxentModel testModel;  private List events;  private int singularIndex;  private int pluralIndex;  public static TestNumberModel testModel(String name) throws IOException {    NumberModel nm = new NumberModel(name, false);    return nm;  }  public static TrainSimilarityModel trainModel(String modelName) throws IOException {    NumberModel gm = new NumberModel(modelName, true);    return gm;  }  private NumberModel(String modelName, boolean train) throws IOException {    this.modelName = modelName;    if (train) {      events = new ArrayList();    }    else {      //if (MaxentResolver.loadAsResource()) {      //  testModel = (new PlainTextGISModelReader(new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(modelName))))).getModel();      //}      testModel = (new SuffixSensitiveGISModelReader(new File(modelName+modelExtension))).getModel();      singularIndex = testModel.getIndex(NumberEnum.SINGULAR.toString());      pluralIndex = testModel.getIndex(NumberEnum.PLURAL.toString());    }  }  private List getFeatures(Context np1) {    List features = new ArrayList();    features.add("default");    Object[] npTokens = np1.getTokens();    for (int ti = 0, tl = npTokens.length - 1; ti < tl; ti++) {      features.add("mw=" + npTokens[ti].toString());    }    features.add("hw=" + np1.getHeadTokenText().toLowerCase());    features.add("ht=" + np1.getHeadTokenTag());    return features;  }  private void addEvent(String outcome, Context np1) {    List feats = getFeatures(np1);    events.add(new Event(outcome, (String[]) feats.toArray(new String[feats.size()])));  }  public NumberEnum getNumber(Context ec) {    if (Linker.singularPronounPattern.matcher(ec.getHeadTokenText()).matches()) {      return NumberEnum.SINGULAR;    }    else if (Linker.pluralPronounPattern.matcher(ec.getHeadTokenText()).matches()) {      return NumberEnum.PLURAL;    }    else {      return NumberEnum.UNKNOWN;    }  }  private NumberEnum getNumber(List entity) {    for (Iterator ci = entity.iterator(); ci.hasNext();) {      Context ec = (Context) ci.next();      NumberEnum ne = getNumber(ec);      if (ne != NumberEnum.UNKNOWN) {        return ne;      }    }    return NumberEnum.UNKNOWN;  }  public void setExtents(Context[] extentContexts) {    HashList entities = new HashList();    List singletons = new ArrayList();    for (int ei = 0, el = extentContexts.length; ei < el; ei++) {      Context ec = extentContexts[ei];      //System.err.println("NumberModel.setExtents: ec("+ec.getId()+") "+ec.toText());      if (ec.getId() != -1) {        entities.put(new Integer(ec.getId()), ec);      }      else {        singletons.add(ec);      }    }    List singles = new ArrayList();    List plurals = new ArrayList();    // coref entities    for (Iterator ei = entities.keySet().iterator(); ei.hasNext();) {      Integer key = (Integer) ei.next();      List entityContexts = (List) entities.get(key);      NumberEnum number = getNumber(entityContexts);      if (number == NumberEnum.SINGULAR) {        singles.addAll(entityContexts);      }      else if (number == NumberEnum.PLURAL) {        plurals.addAll(entityContexts);      }    }    // non-coref entities.    for (Iterator ei = singletons.iterator(); ei.hasNext();) {      Context ec = (Context) ei.next();      NumberEnum number = getNumber(ec);      if (number == NumberEnum.SINGULAR) {        singles.add(ec);      }      else if (number == NumberEnum.PLURAL) {        plurals.add(ec);      }    }    for (Iterator si = singles.iterator(); si.hasNext();) {      Context ec = (Context) si.next();      addEvent(NumberEnum.SINGULAR.toString(), ec);    }    for (Iterator fi = plurals.iterator(); fi.hasNext();) {      Context ec = (Context) fi.next();      addEvent(NumberEnum.PLURAL.toString(),ec);    }  }  public double[] numberDist(Context c) {    List feats = getFeatures(c);    return testModel.eval((String[]) feats.toArray(new String[feats.size()]));  }  public int getSingularIndex() {    return singularIndex;  }  public int getPluralIndex() {    return pluralIndex;  }  public void trainModel() throws IOException {    (new SuffixSensitiveGISModelWriter(GIS.trainModel(new CollectionEventStream(events),100,10),new File(modelName+modelExtension))).persist();      }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -