📄 process.java
字号:
/*
* Process.java
*
* Created on 2007年5月20日, 上午10:01
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package cnu.nlp;
import java.util.*;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.NumberUtils;
/**
*
* @author gnehzuil
*/
public class Process {
private static Process instance = new Process();
public static Process getInstance() {
return instance;
}
private static WordTable wordTable = WordTable.getInstance();
private String inputString;
private List<WordItem> result;
/** Creates a new instance of Process */
private Process() {
result = new ArrayList<WordItem>();
}
public void setInputString(String inputString) {
inputString = StringUtils.trim(inputString);
this.inputString = StringUtils.strip(inputString, "\n");
}
public String getResult() {
String res = "";
for (WordItem item : result) {
if (!item.isUnknown()) {
res += item.getWord();
res += "/ ";
} else {
res += item.getWord();
res +="/nr ";
}
}
return res;
}
public void doProcess() {
normalProcess();
firstNameProcess();
numberProcess();
alphaProcess();
}
private void normalProcess() {
boolean flag = false;
int begin = 0, pos = 1;
result.clear();
while (pos != inputString.length() + 1) {
if (!wordTable
.lookupLogonWord(inputString.substring(begin, pos))) {
WordItem item = new WordItem();
if (!flag)
item.setWord(inputString.substring(begin, pos-1));
else
item.setWord(inputString.substring(begin, pos));
item.setUnknown(false);
result.add(item);
if (!flag) {
flag = true;
begin = pos-1;
} else {
flag = false;
begin = pos;
++pos;
}
} else if (pos == inputString.length()) {
WordItem item = new WordItem();
item.setWord(inputString.substring(begin, pos));
item.setUnknown(false);
result.add(item);
++pos;
} else {
flag = false;
++pos;
}
}
}
private void firstNameProcess() {
// process postfix
for (int i = 0; i < result.size(); i++) {
if (wordTable
.lookupPostfixFirstName(result.get(i).getWord())) {
doPostfixFirstName(i);
}
}
// process prefix
for (int i = 0; i < result.size(); i++) {
if (wordTable.lookupPrefixFirstName(result.get(i).getWord())) {
doPrefixFirstName(i);
}
}
// process firstname
for (int i = 0; i < result.size(); i++) {
if (wordTable.lookupFirstName(result.get(i).getWord())) {
doFirstName(i);
}
}
}
private void doPostfixFirstName(int pos) {
if (pos - 4 >= 0 &&
(wordTable.lookupFirstName(result.get(pos - 4).getWord()) ||
wordTable.lookupTransName(result.get(pos - 4).getWord()))) {
String word =
result.get(pos - 4).getWord() +
result.get(pos - 3).getWord() +
result.get(pos - 2).getWord() +
result.get(pos - 1).getWord();
result.get(pos - 4).setWord(word);
result.get(pos - 4).setUnknown(true);
for (int i = 0; i < 3; i++)
result.remove(pos - 3);
} else if (pos - 3 >= 0 &&
(wordTable.lookupFirstName(result.get(pos - 3).getWord()) ||
wordTable.lookupTransName(result.get(pos - 3).getWord()))) {
String word =
result.get(pos - 3).getWord() +
result.get(pos - 2).getWord() +
result.get(pos - 1).getWord();
result.get(pos - 3).setWord(word);
result.get(pos - 3).setUnknown(true);
for (int i = 0; i < 2; i++)
result.remove(pos - 2);
} else if (pos - 2 >= 0 &&
(wordTable.lookupFirstName(result.get(pos - 2).getWord()) ||
wordTable.lookupTransName(result.get(pos - 2).getWord()))) {
String word =
result.get(pos - 2).getWord() +
result.get(pos - 1).getWord();
result.get(pos - 2).setWord(word);
result.get(pos - 2).setUnknown(true);
result.remove(pos - 1);
} else if (pos - 1 >= 0 &&
(wordTable.lookupFirstName(result.get(pos - 1).getWord()) &&
wordTable.lookupTransName(result.get(pos - 1).getWord()))) {
String word =
result.get(pos - 1).getWord() +
result.get(pos).getWord();
result.get(pos - 1).setWord(word);
result.get(pos - 1).setUnknown(true);
result.remove(pos);
}
}
private void doPrefixFirstName(int pos) {
if (pos + 1 < result.size() &&
(wordTable.lookupFirstName(result.get(pos + 1).getWord()) ||
wordTable.lookupTransName(result.get(pos + 1).getWord()))) {
if (pos + 2 < result.size() &&
1 == result.get(pos + 2).getWord().length() &&
wordTable.lookupLogonWord(result.get(pos + 2).getWord())) {
if (pos + 3 < result.size() &&
1 == result.get(pos + 3).getWord().length() &&
wordTable
.lookupLogonWord(result.get(pos + 3).getWord())) {
String word =
result.get(pos + 1).getWord() +
result.get(pos + 2).getWord() +
result.get(pos + 3).getWord();
result.get(pos + 1).setWord(word);
result.get(pos + 1).setUnknown(true);
result.remove(pos + 2);
result.remove(pos + 2);
} else {
String word =
result.get(pos + 1).getWord() +
result.get(pos + 2).getWord();
result.get(pos + 1).setWord(word);
result.get(pos + 1).setUnknown(true);
result.remove(pos + 2);
}
} else {
result.get(pos + 1).setUnknown(true);
}
}
}
private void numberProcess() {
doArabicNumberProcess();
for (int i = 0; i < result.size(); i++) {
if (wordTable.lookupChineseNumberWord(result.get(i).getWord()))
doChNumberProcess(i);
}
}
private void doArabicNumberProcess() {
for (int i = 0; i < result.size(); i++) {
String digit = result.get(i).getWord();
if (digit.equals(""))
continue;
if (NumberUtils.isDigits(digit)) {
doNumberProcess(i - 1);
}
}
}
private void doNumberProcess(int pos) {
int begin = pos;
int end = pos;
for (int i = pos + 1; i < result.size(); i++) {
String digit = result.get(i).getWord();
if (digit.equals(""))
continue;
else if (NumberUtils.isDigits(digit))
end = i;
else
break;
}
String word = "";
for (int i = begin; i < end + 1; i++)
word += result.get(i).getWord();
result.get(pos).setWord(word);
for (int i = begin + 1; i < end + 1; i++) {
result.remove(pos + 1);
}
}
private void doFirstName(int pos) {
int begin = pos;
int end = pos;
if (pos + 1 < result.size() &&
result.get(pos + 1).getWord().length() == 1 &&
wordTable.lookupLogonWord(result.get(pos + 1).getWord())) {
if (pos + 2 < result.size() &&
result.get(pos + 2).getWord().length() == 1 &&
wordTable
.lookupLogonWord(result.get(pos + 2).getWord()) &&
!wordTable
.lookupPrepWord(result.get(pos + 2).getWord())) {
String word =
result.get(pos).getWord() +
result.get(pos + 1).getWord() +
result.get(pos + 2).getWord();
result.get(pos).setWord(word);
result.get(pos).setUnknown(true);
result.remove(pos + 1);
result.remove(pos + 1);
} else {
String word =
result.get(pos).getWord() +
result.get(pos + 1).getWord();
result.get(pos).setWord(word);
result.get(pos).setUnknown(true);
result.remove(pos + 1);
}
}
}
private void alphaProcess() {
for (int i = 0; i < result.size(); i++) {
String alpha = result.get(i).getWord();
if (StringUtils.isAsciiPrintable(alpha)) {
doAlphaProcess(i);
}
}
}
private void doAlphaProcess(int pos) {
int begin = pos;
int end = pos;
for (int i = pos + 1; i < result.size(); i++) {
String alpha = result.get(i).getWord();
if (StringUtils.isAlpha(alpha))
end = i;
else
break;
}
String word = "";
for (int i = begin; i < end + 1; i++)
word += result.get(i).getWord();
result.get(pos).setWord(word);
for (int i = begin + 1; i < end + 1; i++) {
result.remove(pos + 1);
}
}
private void doChNumberProcess(int pos) {
int begin = pos;
int end = pos;
for (int i = pos + 1; i < result.size(); i++) {
String digit = result.get(i).getWord();
if (wordTable.lookupChineseNumberWord(result.get(i).getWord()))
end = i;
else
break;
}
String word = "";
for (int i = begin; i < end + 1; i++)
word += result.get(i).getWord();
result.get(pos).setWord(word);
for (int i = begin + 1; i < end + 1; i++) {
result.remove(pos + 1);
}
}
}
class WordItem {
private String word;
private boolean unknown;
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public boolean isUnknown() {
return unknown;
}
public void setUnknown(boolean unknown) {
this.unknown = unknown;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -