wordfeatures.java

来自「CRF分类器,是一个很好的研究工具.用于中文信息处理的很好的工具」· Java 代码 · 共 60 行

JAVA
60
字号
package iitb.Model;import iitb.CRF.*;import java.util.*;import java.io.*;/** * * @author Sunita Sarawagi * */ public class WordFeatures extends FeatureTypes {    int stateId;    int statePos;    Object token;    int tokenId;    WordsInTrain dict;    int _numWordStatePairs;    public static int RARE_THRESHOLD=0;    public WordFeatures(FeatureGenImpl m, WordsInTrain d) {	super(m);	dict = d;    }    private void nextStateId() {       	stateId = dict.nextStateWithWord(token, stateId);	statePos++;    }    public boolean startScanFeaturesAt(DataSequence data, int prevPos, int pos) {	stateId = -1;	if (dict.count(data.x(pos)) > RARE_THRESHOLD) {	    token = (data.x(pos));	    tokenId = dict.getIndex(token);	    statePos = -1;	    nextStateId();	    return true;	} 	return false;    }    public boolean hasNext() {	return (stateId != -1);    }    public void next(FeatureImpl f) {        if (featureCollectMode())            setFeatureIdentifier(tokenId*model.numStates()+stateId,stateId,"W_"+token,f);        else            setFeatureIdentifier(tokenId*model.numStates()+stateId,stateId,token,f); 	f.yend = stateId;	f.ystart = -1;	f.val = 1;	nextStateId();    }	/* (non-Javadoc)	 * @see iitb.Model.FeatureTypes#maxFeatureId()	 */	public int maxFeatureId() {		return dict.dictionaryLength()*model.numStates();	}};

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?