📄 fenci.java~13~
字号:
package abstrac;
import java.util.*;
/**
* 分词
*
* @param source
* 待分的字符串
* @return String[]
*/
public class Fenci{
public String[] fenci1 (String source) {
Vector vector = new Vector();
String[] array;
/* 分隔符的集合 */
String delimiters =
" \t\n\r\f~!@#$%^&*()_+|`1234567890-=\\{}[]:\";'<>?,./'";
/* 根据分隔符分词 */
StringTokenizer stringTokenizer = new StringTokenizer(source,
delimiters);
while (stringTokenizer.hasMoreTokens()) {
String token = stringTokenizer.nextToken().toLowerCase().trim();
if(
vector.addElement(token);
}
/*根据大写首字母分词
while (stringTokenizer.hasMoreTokens()) {
String token = stringTokenizer.nextToken();
int index = 0;
flag1:while (index < token.length()) {
flag2:while (true) {
index++;
if ((index == token.length())
|| !Character.isLowerCase(token.charAt(index))) {
break flag2;
}
}
vector.addElement(token.substring(0, index));
//System.out.println("识别出" + token.substring(0, index));
token = token.substring(index);
//System.out.println("剩余" + token);
index = 0;
continue flag1;
}
}
*/
/* 复数转单数
* 参考以下文档:
* http://ftp.haie.edu.cn/Resource/GZ/GZYY/DCYFWF/NJSYYY/421b0061ZW_0015.htm
*/
for (int i = 0; i < vector.size(); i++) {
String token = (String) vector.elementAt(i);
if (token.equalsIgnoreCase("feet")) {
token = "foot";
} else if (token.equalsIgnoreCase("geese")) {
token = "goose";
} else if (token.equalsIgnoreCase("lice")) {
token = "louse";
} else if (token.equalsIgnoreCase("mice")) {
token = "mouse";
} else if (token.equalsIgnoreCase("teeth")) {
token = "tooth";
} else if (token.equalsIgnoreCase("oxen")) {
token = "ox";
} else if (token.equalsIgnoreCase("children")) {
token = "child";
} else if (token.endsWith("men")) {
token = token.substring(0, token.length() - 3) + "man";
} else if (token.endsWith("ies")) {
token = token.substring(0, token.length() - 3) + "y";
} else if (token.endsWith("ves")) {
if (token.equalsIgnoreCase("knives")
|| token.equalsIgnoreCase("wives")
|| token.equalsIgnoreCase("lives")) {
token = token.substring(0, token.length() - 3) + "fe";
} else {
token = token.substring(0, token.length() - 3) + "f";
}
} else if (token.endsWith("oes") || token.endsWith("ches")
|| token.endsWith("shes") || token.endsWith("ses")
|| token.endsWith("xes")) {
token = token.substring(0, token.length() - 2);
} else if (token.endsWith("s")) {
if(!(token.equalsIgnoreCase("as")||token.equalsIgnoreCase("was")
||token.equalsIgnoreCase("besides")||token.equalsIgnoreCase("is")
||token.equalsIgnoreCase("has")||token.equalsIgnoreCase("this")
||token.equalsIgnoreCase("us")||token.equalsIgnoreCase("unless")
||token.equalsIgnoreCase("neverthless")||token.equalsIgnoreCase("across")
||token.equalsIgnoreCase("towards")||token.equalsIgnoreCase("thus")
||token.equalsIgnoreCase("always")))
token = token.substring(0, token.length() - 1);
}
/* 处理完毕 */
vector.setElementAt(token, i);
}
/* 转为数组形式 */
array = new String[vector.size()];
Enumeration enumeration = vector.elements();
int index = 0;
while (enumeration.hasMoreElements()) {
array[index] = (String) enumeration.nextElement();
index++;
}
/* 返回 */
return array;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -