⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tokcontextgenerator.java

📁 自然语言处理领域的一个开发包
💻 JAVA
字号:
///////////////////////////////////////////////////////////////////////////////// Copyright (C) 2000 Jason Baldridge and Gann Bierner// // This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.// // This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the// GNU Lesser General Public License for more details.// // You should have received a copy of the GNU Lesser General Public// License along with this program; if not, write to the Free Software// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//////////////////////////////////////////////////////////////////////////////package opennlp.tools.tokenize;import opennlp.maxent.ContextGenerator;import opennlp.tools.util.ObjectIntPair;import java.util.List;import java.util.ArrayList;/** * Generate events for maxent decisions for tokenization. * * @author      Jason Baldridge * @version     $Revision: 1.2 $, $Date: 2004/01/26 14:16:37 $ */public class TokContextGenerator implements ContextGenerator {    public static final String SPLIT ="T";    public static final String NO_SPLIT ="F";    /**     * Builds up the list of features based on the information in the Object,     * which is a pair containing a String and and Integer which     * indicates the index of the position we are investigating.     */    public String[] getContext(Object o) {		String sb = (String)((ObjectIntPair)o).a;	int id = ((ObjectIntPair)o).b;			List preds = new ArrayList();	preds.add("p="+sb.substring(0,id));	preds.add("s="+sb.substring(id));	if (id>0) {	    addCharPreds("p1", sb.charAt(id-1), preds);	    if (id>1) {		addCharPreds("p2", sb.charAt(id-2), preds);		preds.add("p21="+sb.charAt(id-2)+sb.charAt(id-1));	    }	    else {	        preds.add("p2=bok");	    }	    preds.add("p1f1="+sb.charAt(id-1)+sb.charAt(id));	}	else {	    preds.add("p1=bok");	}	addCharPreds("f1",sb.charAt(id), preds);	if (id+1 < sb.length()) {	    addCharPreds("f2", sb.charAt(id+1), preds);	    preds.add("f12="+sb.charAt(id)+sb.charAt(id+1));	}	else {	    preds.add("f2=bok");	}	if (sb.charAt(0) == '&' && sb.charAt(sb.length()-1) == ';') {	  preds.add("cc");//character code	}	String[] context = new String[preds.size()];	preds.toArray(context);	return context;    }        /**     * Helper function for getContext.     */    private void addCharPreds(String key, char c, List preds) {	preds.add(key + "=" + c);	if (Character.isLetter(c)) {	    preds.add(key+"_alpha");	    if (Character.isUpperCase(c)) {		preds.add(key+"_caps");	    }	} else if (Character.isDigit(c)) {	    preds.add(key+"_num");	} else if (Character.isWhitespace(c)) {	    preds.add(key+"_ws");	} else {	    if (c=='.' || c=='?' || c=='!') {		preds.add(key+"_eos");	    } else if (c=='`' || c=='"' || c=='\'') {		preds.add(key+"_quote");	    } else if (c=='[' || c=='{' || c=='(') {		preds.add(key+"_lp");	    } else if (c==']' || c=='}' || c==')') {		preds.add(key+"_rp");	    }	}    } }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -