📄 dictionary.java
字号:
}
return true;
}
}
return false;
}
// The data for modify
protected boolean delModified() {
mts = null;
return true;
}
public boolean isExist(String word, int handle) {
if (word != null) {
Preword pw = preProcessing(word);
if (pw != null) {
if (findInOriginalTable(pw.getIndex(), pw.getRes(), handle) >= 0
|| findInModifyTable(pw.getIndex(), pw.getRes(), handle) >= 0)
return true;
}
}
return false;
}
public ArrayList<WordItem> getHandle(String word) {
ArrayList<WordItem> result = null;
if (word != null) {
result = new ArrayList<WordItem>();
Preword pw = preProcessing(word);
if (pw != null && pw.getWord() != null) {
int found = findInOriginalTable(pw.getIndex(), pw.getRes(), -1);
if (found >= 0) {
WordItem wi = new WordItem();
WordItem wi2 = wts.get(pw.getIndex()).getWords().get(found);
wi.setHandle(wi2.getHandle());
wi.setFreq(wi2.getFreq());
result.add(wi);
int temp = found + 1;
WordTable wt = wts.get(pw.getIndex());
while (temp < wt.getCount() && strEqual(wt.getWords().get(temp).getWord(), pw.getRes())) {
wi = new WordItem();
wi.setHandle(wt.getWords().get(temp).getHandle());
wi.setFreq(wt.getWords().get(temp).getFreq());
wi.setWord(word);
result.add(wi);
temp++;
}
return result;
}
int found2 = findInModifyTable(pw.getIndex(), pw.getRes(), -1);
if (found2 >= 0) {
ModifyTable mt = mts.get(pw.getIndex());
ArrayList<WordItem> wis = mt.getWords();
for (int i = found2; i < wis.size(); i++) {
WordItem wi0 = wis.get(i);
if (strEqual(wi0.getWord(), pw.getRes())) {
WordItem wi = new WordItem();
wi.setHandle(wi0.getHandle());
wi.setFreq(wi0.getFreq());
wi.setWord(word);
result.add(wi);
}
}
}
}
}
return result;
}
/**
* 用2分法查询源词典库,看是否已存在
*
* @param index
* 大数据块对应的下标(所有同一个字开头的词条为一个大数据块)
* @param res
* 去掉头一个字后剩余的部分
* @param handle
* @return
*/
public int findInOriginalTable(int index, String res, int handle) {
int result = -1;
if (res != null && wts != null) {
WordTable wt = wts.get(index);
if (wt != null && wt.getCount() > 0) {
int start = 0;
int end = wt.getCount() - 1;
int mid = (end + start) / 2;
ArrayList<WordItem> wis = wt.getWords();
while (start <= end) {
WordItem wi = wis.get(mid);
int cmpValue = GFString.compareTo(wi.getWord(), res);
if (cmpValue == 0 && (wi.getHandle() == handle || handle == -1)) {
if (handle == -1) {
while (mid >= 0 && res.compareTo(wis.get(mid).getWord()) == 0) {
mid--;
}
if (mid < 0 || res.compareTo(wis.get(mid).getWord()) != 0)
mid++;
}
result = mid;
return result;
} else if (cmpValue < 0 || cmpValue == 0 && wi.getHandle() < handle && handle != -1)
start = mid + 1;
else if (cmpValue > 0 || cmpValue == 0 && wi.getHandle() > handle && handle != -1)
end = mid - 1;
mid = (start + end) / 2;
}
}
}
return result;
}
/**
* 从修改表中查询是否存在,并返回它的位置坐标
*
* @param index
* @param res
* @param handle
* @return 位置坐标
*/
protected int findInModifyTable(int index, String res, int handle) {
int result = -1;
if (mts != null && mts.size() > index) {
ArrayList<WordItem> wis = mts.get(index).getWords();
if (res != null && wis != null) {
int i = 0;
for (; i < wis.size(); i++) {
WordItem wi = wis.get(i);
if (wi.getWord().length() < res.length()
|| (wi.getWord().length() == res.length() && wi.getHandle() < handle))
continue;
}
if (i < wis.size() && strEqual(wis.get(i).getWord(), res)
&& (wis.get(i).getHandle() == handle || handle < 0))
result = i;
}
}
return result;
}
// TODO
public boolean strEqual(String b1, String b2) {
if (b1 == null && b2 == null)
return true;
else if (b1 != null && b2 != null) {
return b1.equals(b2);
}
return false;
}
public int getWordType(String word) {
if (word != null) {
int type = Utility.charType(word);
int len = word.length();
if (len > 0 && type == Utility.CT_CHINESE && GFString.isAllChinese(word))
return Utility.WT_CHINESE;
else if (len > 0 && type == Utility.CT_DELIMITER)
return Utility.WT_DELIMITER;
}
return Utility.WT_OTHER;
}
/**
* 预处理,先把词前后的空格去掉
*
* @param word
* @param wordRet
* @param isAdd
* @return
*/
public Preword preProcessing(String word) {
Preword result = null;
if (word != null && word.length() > 0) {
int type = Utility.charType(word);
word = GFString.removeSpace(word);
int len = word.length();
int end = len - 1, begin = 0;
if (begin > end)
return null;
result = new Preword();
result.setWord(word);
if (type == Utility.CT_CHINESE) {// Chinese word
result.setIndex(Utility.CC_ID(word));
if (word != null)
result.setRes(word.length() > 1 ? word.substring(1) : "");
}
else if (type == Utility.CT_DELIMITER) {// Delimiter
result.setIndex(3755);
result.setRes(word);
} else
result.setIndex(-1);
}
return result;// other invalid
}
public boolean mergePOS(int handle) {
mts = new ArrayList<ModifyTable>();
for (int i = 0; i < Utility.CC_NUM; i++) {
}
return false;
}
/**
* 从词典库中找出最匹配的一个
*
* @param word
* @return
*/
public WordItem getMaxMatch(String word) {
if (word != null) {
Preword pw = preProcessing(word);
if (pw != null & pw.getWord() != null && pw.getIndex() >= 0) {
String firstChar = pw.getWord().substring(0, 1);
int found = findInOriginalTable(pw.getIndex(), pw.getRes(), -1);
if (found == -1) {
ArrayList<WordItem> wis = wts.get(pw.getIndex()).getWords();
for (int j = 0; j < wis.size(); j++) {
int compValue = GFString.compareTo(wis.get(j).getWord(), pw.getRes());
if (compValue == 1) {
found = j;
break;
}
}
}
// 从源词典表中找出去掉第一个开头的字之后相等的词
if (found >= 0 && wts != null && wts.get(pw.getIndex()) != null) {
// 至少有一个
ArrayList<WordItem> wis = wts.get(pw.getIndex()).getWords();
if (wis == null) return null;
WordItem wi = wis.get(found);
String wordRet = firstChar + wi.getWord();
return new WordItem(wordRet,wi.getLen(),wi.getHandle(),wi.getFreq());
}
ArrayList<WordItem> wis = null;
if (mts != null && mts.get(pw.getIndex()) != null) {
wis = mts.get(pw.getIndex()).getWords();
if (wis != null)
for (WordItem wi : wis) {
if (pw.getRes() != null && pw.getRes().equals(wi.getWord())) {
String wordRet = firstChar + wi.getWord();
return new WordItem(wordRet,wi.getLen(),wi.getHandle(),wi.getFreq());
}
}
}
}
}
return null;
}
public int getFreq(String word, int handle) {
if (word != null && word.length() > 0) {
Preword pw = preProcessing(word);
if (pw != null) {
int found = findInOriginalTable(pw.getIndex(), pw.getRes(), handle);
if (found >= 0 && wts != null) {
WordTable wt = wts.get(pw.getIndex());
WordItem wi = wt.getWords().get(found);
return wi.getFreq();
}
int found2 = findInModifyTable(pw.getIndex(), pw.getRes(), handle);
if (found2 >= 0 && mts != null) {
ModifyTable mt = mts.get(pw.getIndex());
WordItem wi = mt.getWords().get(found);
return wi.getFreq();
}
}
}
return 0;
}
// ---------------------------------------------------------//
// 暂时不会用到的方法
public boolean optimum() {
return false;
}
public boolean merge(Dictionary dict2, int nRatio) {
return false;
}
public boolean outputChars(String sFilename) {
return false;
}
public boolean output(String sFilename) {
return false;
}
public boolean getPOSString(int nPOS, String sPOSRet) {
return false;
}
public int getPOSValue(byte[] sPOS) {
return 0;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -