📄 segment.java
字号:
wordResult[index].setValue(fValue);
// Utility.insertGraph(optGraph,sg,false);
}
}
ArrayList<WordResult> result = new ArrayList<WordResult>();
if (wordResult != null)
for (WordResult wr : wordResult)
if (wr != null)
result.add(wr);
return result;
}
private ArrayList<SegGraph> getSegPath(ArrayList<Atom> atoms, ArrayList<SegGraph> sgs, int[] unipath) {
int curIndex = 0;
int nextIndex = 0;
ArrayList<SegGraph> path = null;
if (atoms != null && sgs != null && unipath != null) {
path = new ArrayList<SegGraph>();
for (int i = 0; i < unipath.length; i++) {
if (i != unipath.length - 1) {
curIndex = unipath[i];
nextIndex = unipath[i + 1];
} else {
curIndex = unipath[i];
nextIndex = -1;
}
SegGraph seg = Utility.getElement(sgs, curIndex, nextIndex);
if (seg != null) {
String word = getWords(atoms, curIndex, nextIndex);
seg.setWord(word);
path.add(seg);
}
}
}
return path;
}
private String getWords(ArrayList<Atom> atoms, int curIndex, int nextIndex) {
String words = null;
if (atoms != null && curIndex >= 0 && curIndex < atoms.size()) {
if (nextIndex == -1)
nextIndex = atoms.size();
words = "";
for (int i = curIndex; i < nextIndex; i++) {
words += atoms.get(i).getWord();
}
}
return words;
}
public ArrayList<WordResult> adjust(ArrayList<WordResult> wrs) {
ArrayList<WordResult> result = null;
PersonName pname = null;
if (wrs == null || wrs.size() == 0)
return null;
int j = 0;
boolean isBeProcess = false;
WordResult wr = null;
result = new ArrayList<WordResult>();
for (int i = 0; i < wrs.size(); i++) {
wr = wrs.get(i);
if (wr.getHandle() == 28274 && (pname = chineseNameSplit(wr.getWord(), unPerson.unDict)) != null
&& !"叶利钦".equals(wr.getWord())) {
if (pname.getFirstName() != null) {
WordResult wr2 = new WordResult();
wr2.setWord(pname.getFirstName());
wr2.setHandle(28274);
result.add(wr2);
}
if (pname.getMidName() != null) {
WordResult wr2 = new WordResult();
wr2.setWord(pname.getMidName());
wr2.setHandle(28274);
result.add(wr2);
}
if (pname.getLastName() != null) {
WordResult wr2 = new WordResult();
wr2.setWord(pname.getLastName());
wr2.setHandle(28274);
result.add(wr2);
}
isBeProcess = true;
}
// Rule2 for overlap words ABB 一段段、一片片
else if (wr.getHandle() == 27904 && wrs.get(i + 1).getWord().getBytes().length == 2
&& wrs.get(i + 1).getWord().equals(wrs.get(i + 2).getWord())) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord() + wrs.get(i + 2).getWord());
wr2.setHandle(27904);
result.add(wr2);
i += 2;
isBeProcess = true;
}
// Rule3 for overlap words AA
else if (wr.getWord().getBytes().length == 2 && wr.getWord().equals(wrs.get(i + 1))) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord());
wr2.setHandle(24832);
if (wr.getHandle() / 256 == 'v' || wrs.get(i + 1).getHandle() / 256 == 'v')// 30208='v'8256
wr2.setHandle(30208);
if (wr.getHandle() / 256 == 'n' || wrs.get(i + 1).getHandle() / 256 == 'n')// 30208='v'8256
wr2.setHandle('n' * 256);
i += 1;
if (wrs.get(i + 1).getWord().getBytes().length == 2) {// AAB:洗/洗/脸、蒙蒙亮
if ((wr2.getHandle() == 30208 && wrs.get(i + 1).getHandle() / 256 == 'n')
|| (wr2.getHandle() == 24832 && wrs.get(i + 1).getHandle() / 256 == 'a')) {
wr2.setWord(wr2.getWord() + wrs.get(i + 1).getWord());
i += 1;
}
}
j += 1;
isBeProcess = true;
result.add(wr2);
}
// Rule 4: AAB 洗/洗澡
else if (wr.getWord().getBytes().length == 2 && (wr.getHandle() / 256 == 'v' || wr.getHandle() == 24832)
&& wrs.get(i + 1).getWord().getBytes().length == 4
&& wrs.get(i + 1).getWord().indexOf(wr.getWord()) == 0) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord());
wr2.setHandle(24832); // 24832=='a'*256
if (wr.getHandle() / 256 == 'v' || wrs.get(i + 1).getHandle() / 256 == 'v')// 30208='v'8256
wr2.setHandle(30208);
i += 1;
j += 1;
isBeProcess = true;
result.add(wr2);
} else if (wr.getHandle() / 256 == 'v' && wr.getHandle() % 256 != 0)// uj,ud,uv,uz,ul,ug->u
wr.setHandle('u' * 256);
else if (wr.getWord().getBytes().length == 2 && wrs.get(i + 1).getWord().getBytes().length == 4
&& wrs.get(i + 1).getWord().indexOf(wr.getWord()) == 0
&& wrs.get(i + 1).getWord().indexOf(wrs.get(i + 2).getWord()) == 0) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord() + wrs.get(i + 2).getWord());
wr2.setHandle(wrs.get(i + 1).getHandle());
i += 2;
j++;
isBeProcess = true;
result.add(wr2);
} else if (wr.getHandle() == 28275)// PostFix
{
if (unPlace.unDict.isExist(wrs.get(i + 1).getWord(), 4)) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord());
wr2.setHandle(28275);
i += 1;
j += 1;
isBeProcess = true;
result.add(wr2);
} else if (wrs.get(i + 1).getWord().getBytes().length == 2 && "队".equals(wrs.get(i + 1).getWord())) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord());
wr2.setHandle(28276);
i += 1;
j += 1;
isBeProcess = true;
result.add(wr2);
} else if (wrs.get(i + 1).getWord().getBytes().length == 2
&& "语文字杯".indexOf(wrs.get(i + 1).getWord()) != -1) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord());
wr2.setHandle(28282);
i += 1;
j += 1;
isBeProcess = true;
result.add(wr2);
} else if ("裔".equals(wrs.get(i + 1).getWord())) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord());
wr2.setHandle(28160);
i += 1;
j += 1;
isBeProcess = true;
result.add(wr2);
}
} else if (wr.getHandle() == 30208 || wr.getHandle() == 28160)// v
{
if (i + 1 < wrs.size() && "员".equals(wrs.get(i + 1).getWord())) {
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord() + wrs.get(i + 1).getWord());
wr2.setHandle(28160);
i += 1;
j += 1;
isBeProcess = true;
result.add(wr2);
}
} else if (wr.getHandle() == 28280) {// www/nx ./w sina/nx;
// EIM/nx -601/m
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord());
wr2.setHandle(28280);
while (wrs.get(i + 1).getHandle() == 28280 || "..".indexOf(wrs.get(i + 1).getWord()) != -1
|| (wrs.get(i + 1).getHandle() == 27904 && Utility.isAllNum(wrs.get(i + 1).getWord()))) {
wr2.setWord(wr2.getWord() + wrs.get(i + 1).getWord());
i += 1;
}
j += 1;
isBeProcess = true;
result.add(wr2);
}
if (!isBeProcess) {// If not processed,that's mean: not need to
// adjust;
// just copy to the final result
WordResult wr2 = new WordResult();
wr2.setWord(wr.getWord());
wr2.setHandle(wr.getHandle());
result.add(wr2);
j++;
}
}
return result;
}
public PersonName chineseNameSplit(String word, Dictionary personDict) {
PersonName result = null;
if (word != null && personDict != null) {
int len = word.length();
if (len < 2 || len > 4)
return null;
String[] atoms = GFString.atomSplit(word);
for (String s : atoms) {
if (Utility.charType(s) != Utility.CT_CHINESE && Utility.charType(s) != Utility.CT_OTHER)
return null;
}
String surName = null;
int surNameLen = 2;
if (len > 2)
surName = word.substring(0, surNameLen);
else if (len == 2)
surName = word;
if (!personDict.isExist(surName, 1)) {
surNameLen = 1;
if (len > 1)
surName = word.substring(0, surNameLen);
else if (len == 1)
surName = word;
if (!personDict.isExist(surName, 1)) {
surName = null;
surNameLen = 0;
}
}
String giveName = word.substring(surNameLen);
if (len > 3) {
String temp = word.substring(surNameLen, surNameLen + 1);
if (personDict.isExist(temp, 1)) {
giveName = word.substring(surNameLen + 1);
}
}
double freq = personDict.getFrequency(surName, 1);
String temp = giveName.substring(0, 1);
double freq2 = personDict.getFrequency(temp, 2);
if (surNameLen != 2
&& ((surNameLen == 0 && len > 2) || giveName.length() > 2 || getForeignCharCount(word) >= 3
&& freq < personDict.getFrequency("张", 1) / 40
&& freq2 < personDict.getFrequency("华", 2) / 20 || (freq < 10 && getForeignCharCount(giveName) == (len - surNameLen) / 2)))
return null;
if (len == 2 && unPerson.isGivenName(word))
return null;
}
return result;
}
private int getForeignCharCount(String name) {
return 0;
}
public String getResult() {
return splitedWord;
}
public long getSpendTime() {
return spendTime;
}
public void setSpendTime(long spendTime) {
this.spendTime = spendTime;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -