⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexcountfeatures.java

📁 CRF1.2
💻 JAVA
字号:
/* * Created on Feb 18, 2005 * * TODO To change the template for this generated file go to * Window - Preferences - Java - Code Style - Code Templates */package iitb.Model;import java.util.regex.Pattern;import iitb.CRF.DataSequence;/** * @author imran * */public class RegexCountFeatures extends FeatureTypes {    String patternString[][] = {    		{"isInitCapitalWord",     		"[A-Z][a-z]+"        },    		{"isAllCapitalWord",      		"[A-Z][A-Z]+"                },    		{"isAllSmallCase",      	"[a-z]+"                },    		{"isWord",           		"[a-zA-Z][a-zA-Z]+"     },    		{"isAlphaNumeric",      	"[a-zA-Z0-9]+"          },    		{"singleCapLetter",  		"[A-Z]"  				},    		{"isSpecialCharacter",		"[#;:\\-/<>'\"()&]"},    		//{"singlePunctuation", 		"\\p{Punct}"			},    		{"singleDot", 				"[.]"			},    		{"singleComma", 			"[,]"			},    		{"containsDigit", 			".*\\d+.*"		},				    		{"isDigits", 				"\\d+"			},    	};    Pattern p[];	int patternOccurence[], index, maxSegmentLength;    /**     * @param m     */    public RegexCountFeatures(FeatureGenImpl m, int maxSegmentLength) {        super(m);        this.maxSegmentLength = maxSegmentLength;        p = new Pattern[patternString.length];		for(int i = 0; i < patternString.length; i++)			p[i] = Pattern.compile(patternString[i][1]);		patternOccurence = new int[patternString.length];    }    public boolean startScanFeaturesAt(DataSequence data, int prevPos, int pos) {                int i, j;		for(j = 0; j < patternOccurence.length; j++)		    patternOccurence[j] = 0;		for(i = prevPos + 1; i <= pos; i++){		    for(j = 0; j < p.length; j++){		        if(p[j].matcher((String)data.x(i)).matches())		            patternOccurence[j]++;		    }		}		index = -1;        return advance();    }    private boolean advance() {                while(++index < (patternOccurence.length) && patternOccurence[index] <= 0);        return index < patternOccurence.length;    }    public boolean hasNext() {        return index < patternOccurence.length;    }    public void next(FeatureImpl f) {		f.val = 1;		patternOccurence[index] = Math.min(maxSegmentLength,patternOccurence[index]);		f.strId.id = maxSegmentLength * (index+1) + patternOccurence[index];		f.ystart = -1;		if(featureCollectMode()){			f.strId.name = patternString[index][0] + "_Count_" + patternOccurence[index];			//System.out.println((String)f.strId.name +" " +index + " " + f.strId.id);		}    	advance();    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -