⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 checkcontextgenerator.java

📁 自然语言处理领域的一个开发包
💻 JAVA
字号:
/////////////////////////////////////////////////////////////////////////////////Copyright (C) 2003 Thomas Morton// //This library is free software; you can redistribute it and/or//modify it under the terms of the GNU Lesser General Public//License as published by the Free Software Foundation; either//version 2.1 of the License, or (at your option) any later version.// //This library is distributed in the hope that it will be useful,//but WITHOUT ANY WARRANTY; without even the implied warranty of//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//GNU Lesser General Public License for more details.// //You should have received a copy of the GNU Lesser General Public//License along with this program; if not, write to the Free Software//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.tools.parser;import java.util.ArrayList;import java.util.Collection;import java.util.Iterator;import java.util.List;import opennlp.maxent.ContextGenerator;/** * Class for generating predictive context for deciding when a constituent is complete. * @author Tom Morton * */public class CheckContextGenerator implements ContextGenerator {  private static final String EOS = "eos";    /**   * Creates a new context generator for generating predictive context for deciding when a constituent is complete.   */  public CheckContextGenerator() {    super();  }  public String[] getContext(Object o) {    Object[] params = (Object[]) o;    return getContext((Parse[]) params[0], (String) params[1], ((Integer) params[2]).intValue(), ((Integer) params[3]).intValue());  }  private void surround(Parse p, int i, String type, Collection punctSet, List features) {    StringBuffer feat = new StringBuffer(20);    feat.append("s").append(i).append("=");    if (punctSet !=null) {      for (Iterator pi=punctSet.iterator();pi.hasNext();) {        Parse punct = (Parse) pi.next();        if (p != null) {          feat.append(p.getHead().toString()).append("|").append(type).append("|").append(p.getHead().getType()).append("|").append(punct.getType());        }        else {          feat.append(type).append("|").append(EOS).append("|").append(punct.getType());        }        features.add(feat.toString());                feat.setLength(0);        feat.append("s").append(i).append("*=");        if (p != null) {          feat.append(type).append("|").append(p.getHead().getType()).append("|").append(punct.getType());        }        else {          feat.append(type).append("|").append(EOS).append("|").append(punct.getType());        }        features.add(feat.toString());        feat.setLength(0);        feat.append("s").append(i).append("*=");        feat.append(type).append("|").append(punct.getType());        features.add(feat.toString());      }    }    else {      if (p != null) {        feat.append(p.getHead().toString()).append("|").append(type).append("|").append(p.getHead().getType());      }      else {        feat.append(type).append("|").append(EOS);      }      features.add(feat.toString());      feat.setLength(0);      feat.append("s").append(i).append("*=");      if (p != null) {        feat.append(type).append("|").append(p.getHead().getType());      }      else {        feat.append(type).append("|").append(EOS);      }      features.add(feat.toString());    }  }  private void checkcons(Parse p, String i, String type, List features) {    StringBuffer feat = new StringBuffer(20);    feat.append("c").append(i).append("=").append(p.getType()).append("|").append(p.getHead().toString()).append("|").append(type);    features.add(feat.toString());    feat.setLength(0);    feat.append("c").append(i).append("*=").append(p.getType()).append("|").append(type);    features.add(feat.toString());  }  private void checkcons(Parse p1, Parse p2, String type, List features) {    StringBuffer feat = new StringBuffer(20);    feat.append("cil=").append(type).append(",").append(p1.getType()).append("|").append(p1.getHead().toString()).append(",").append(p2.getType()).append("|").append(p2.getHead().toString());    features.add(feat.toString());    feat.setLength(0);    feat.append("ci*l=").append(type).append(",").append(p1.getType()).append(",").append(p2.getType()).append("|").append(p2.getHead().toString());    features.add(feat.toString());    feat.setLength(0);    feat.append("cil*=").append(type).append(",").append(p1.getType()).append("|").append(p1.getHead().toString()).append(",").append(p2.getType());    features.add(feat.toString());    feat.setLength(0);    feat.append("ci*l*=").append(type).append(",").append(p1.getType()).append(",").append(p2.getType());    features.add(feat.toString());  }    /**   * Returns predictive context for deciding whether the specified constituents between the specified start and end index    * can be combined to form a new constituent of the specified type.     * @param constituents The constituents which have yet to be combined into new constituents.   * @param type The type of the new constituent proposed.   * @param start The first constituent of the proposed constituent.   * @param end The last constituent of the proposed constituent.   * @return The predictive context for deciding whether a new constituent should be created.   */  public String[] getContext(Parse[] constituents, String type, int start, int end) {    int ps = constituents.length;    List features = new ArrayList(100);    //default     features.add("default");    Parse pstart = constituents[start];    Parse pend = constituents[end];    checkcons(pstart, "begin", type, features);    checkcons(pend, "last", type, features);    StringBuffer production = new StringBuffer(20);    StringBuffer punctProduction = new StringBuffer(20);    production.append("p=").append(type).append("->");    punctProduction.append("pp=").append(type).append("->");    for (int pi = start; pi < end; pi++) {      Parse p = constituents[pi];      checkcons(p, pend, type, features);      production.append(p.getType()).append(",");      punctProduction.append(p.getType()).append(",");      Collection nextPunct = p.getNextPunctuationSet();      if (nextPunct != null) {        for (Iterator pit=nextPunct.iterator();pit.hasNext();) {          Parse punct = (Parse) pit.next();          punctProduction.append(punct.getType()).append(",");        }      }    }    production.append(pend.getType());    punctProduction.append(pend.getType());    features.add(production.toString());    features.add(punctProduction.toString());    Parse p_2 = null;    Parse p_1 = null;    Parse p1 = null;    Parse p2 = null;    Collection p1s = constituents[end].getNextPunctuationSet();    Collection p2s = null;    Collection p_1s = constituents[start].getPreviousPunctuationSet();    Collection p_2s = null;    if (start - 2 >= 0) {      p_2 = constituents[start - 2];    }    if (start - 1 >= 0) {      p_1 = constituents[start - 1];      p_2s = p_1.getPreviousPunctuationSet();    }    if (end + 1 < ps) {      p1 = constituents[end + 1];      p2s = p1.getNextPunctuationSet();    }    if (end + 2 < ps) {      p2 = constituents[end + 2];    }    surround(p_1, -1, type, p_1s, features);    surround(p_2, -2, type, p_2s, features);    surround(p1, 1, type, p1s, features);    surround(p2, 2, type, p2s, features);    return ((String[]) features.toArray(new String[features.size()]));  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -