📄 worddictionary.cs
字号:
indexTable[i].WordItems[j].nFrequency = (nRatio * indexTable[i].WordItems[j].nFrequency + dict2.indexTable[i].WordItems[k].nFrequency) / (nRatio + 1);
j += 1;
k += 1;
}
//Get next word in the current dictionary
else if (nCmpValue < 0)
{
indexTable[i].WordItems[j].nFrequency = (nRatio * indexTable[i].WordItems[j].nFrequency) / (nRatio + 1);
j += 1;
}
else
//Get next word in the second dictionary
{
if (dict2.indexTable[i].WordItems[k].nFrequency > (nRatio + 1) / 10)
{
sWord = string.Format("{0}{1}", Utility.CC_ID2Char(i).ToString(), dict2.indexTable[i].WordItems[k].sWord);
AddItem(sWord, dict2.indexTable[i].WordItems[k].nPOS, dict2.indexTable[i].WordItems[k].nFrequency / (nRatio + 1));
}
k += 1;
}
}
//words in current dictionary are left
while (j < indexTable[i].nCount)
{
indexTable[i].WordItems[j].nFrequency = (nRatio * indexTable[i].WordItems[j].nFrequency) / (nRatio + 1);
j += 1;
}
//words in Dict2 are left
while (k < dict2.indexTable[i].nCount)
{
if (dict2.indexTable[i].WordItems[k].nFrequency > (nRatio + 1) / 10)
{
sWord = string.Format("{0}{1}", Utility.CC_ID2Char(i).ToString(), dict2.indexTable[i].WordItems[k].sWord);
AddItem(sWord, dict2.indexTable[i].WordItems[k].nPOS, dict2.indexTable[i].WordItems[k].nFrequency / (nRatio + 1));
}
k += 1;
}
}
return true;
}
#endregion
#region Optimum Method
//====================================================================
//Delete word item which
//(1)frequency is 0
//(2)word is same as following but the POS value is parent set of the following
//for example "江泽民/n/0" will deleted, because "江泽民/nr/0" is more detail and correct
//====================================================================
public bool Optimum()
{
int nPrevPOS, i, j, nPrevFreq;
string sPrevWord, sCurWord;
for (i = 0; i < Predefine.CC_NUM; i++)
{
j = 0;
sPrevWord = null;
nPrevPOS = 0;
nPrevFreq = -1;
while (j < indexTable[i].nCount)
{
sCurWord = string.Format("{0}{1}", Utility.CC_ID2Char(i).ToString(), indexTable[i].WordItems[j].sWord);
if (nPrevPOS == 30720 || nPrevPOS == 26368 || nPrevPOS == 29031 ||
(sPrevWord == sCurWord && nPrevFreq == 0 && indexTable[i].WordItems[j].nPOS / 256 * 256 == nPrevPOS))
{
//Delete Previous word item
//Delete word with POS 'x','g' 'qg'
DelItem(sPrevWord, nPrevPOS);
}
sPrevWord = sCurWord;
nPrevPOS = indexTable[i].WordItems[j].nPOS;
nPrevFreq = indexTable[i].WordItems[j].nFrequency;
j += 1; //Get next item in the original table.
}
}
return true;
}
#endregion
#region Private Functions
#region PreProcessing Method
//====================================================================
// Func Name : PreProcessing
// Description: Get the type of word
// Parameters : sWord: the word
// Returns : the type
//====================================================================
private bool PreProcessing(ref string sWord, out int nId, out string sWordRet)
{
sWord = sWord.Trim();
//Position for the delimeters
int nType = Utility.charType(sWord.ToCharArray()[0]);
if (sWord.Length != 0)
{
//Chinese word
if (nType == Predefine.CT_CHINESE)
{
//Get the inner code of the first Chinese Char
byte[] byteArray = Utility.String2ByteArray(sWord);
nId = Utility.CC_ID(byteArray[0], byteArray[1]);
//store the word,not store the first Chinese Char
sWordRet = sWord.Substring(1);
return true;
}
//Delimiter
if (nType == Predefine.CT_DELIMITER)
{
nId = 3755;
//Get the inner code of the first Chinese Char
sWordRet = sWord; //store the word, not store the first Chinese Char
return true;
}
}
nId = 0;
sWordRet = "";
return false; //other invalid
}
#endregion
#region FindInOriginalTable Method
//====================================================================
// Func Name : FindInOriginalTable
// Description: judge the word and handle exist in the inner table and its items
// Parameters : nInnerCode: the inner code of the first CHines char
// sWord: the word
// nHandle:the handle number
// *nPosRet:the position which node is matched
// Returns : success or fail
//====================================================================
private bool FindInOriginalTable(int nInnerCode, string sWord, int nPOS, out int nPosRet)
{
WordItem[] pItems = indexTable[nInnerCode].WordItems;
int nStart = 0, nEnd = indexTable[nInnerCode].nCount - 1;
int nMid = (nStart + nEnd) / 2, nCmpValue;
while (nStart <= nEnd)
//Binary search
{
nCmpValue = Utility.CCStringCompare(pItems[nMid].sWord, sWord);
if (nCmpValue == 0 && (pItems[nMid].nPOS == nPOS || nPOS == -1))
{
if (nPOS == -1)
//Not very strict match
{
nMid -= 1;
while (nMid >= 0 && string.Compare(pItems[nMid].sWord, sWord) == 0)
//Get the first item which match the current word
nMid--;
if (nMid < 0 || string.Compare(pItems[nMid].sWord, sWord) != 0)
nMid++;
}
nPosRet = nMid;
return true;//find it
}
else if (nCmpValue < 0 || (nCmpValue == 0 && pItems[nMid].nPOS < nPOS && nPOS != -1))
{
nStart = nMid + 1;
}
else if (nCmpValue > 0 || (nCmpValue == 0 && pItems[nMid].nPOS > nPOS && nPOS != -1))
{
nEnd = nMid - 1;
}
nMid = (nStart + nEnd) / 2;
}
//Get the previous position
nPosRet = nMid - 1;
return false;
}
//====================================================================
// Func Name : FindInOriginalTable
// Description: judge the word and handle exist in the inner table and its items
// Parameters : nInnerCode: the inner code of the first CHines char
// sWord: the word
// nHandle:the handle number
// Returns : success or fail
//====================================================================
private bool FindInOriginalTable(int nInnerCode, string sWord, int nPOS)
{
WordItem[] pItems = indexTable[nInnerCode].WordItems;
int nStart = 0, nEnd = indexTable[nInnerCode].nCount - 1;
int nMid = (nStart + nEnd) / 2, nCmpValue;
//Binary search
while (nStart <= nEnd)
{
nCmpValue = Utility.CCStringCompare(pItems[nMid].sWord, sWord);
if (nCmpValue == 0 && (pItems[nMid].nPOS == nPOS || nPOS == -1))
return true;//find it
else if (nCmpValue < 0 || (nCmpValue == 0 && pItems[nMid].nPOS < nPOS && nPOS != -1))
nStart = nMid + 1;
else if (nCmpValue > 0 || (nCmpValue == 0 && pItems[nMid].nPOS > nPOS && nPOS != -1))
nEnd = nMid - 1;
nMid = (nStart + nEnd) / 2;
}
return false;
}
#endregion
#region FindInModifyTable Method
//====================================================================
// Func Name : FindInModifyTable
// Description: judge the word and handle exist in the modified table and its items
// Parameters : nInnerCode: the inner code of the first CHines char
// sWord: the word
// nHandle:the handle number
// *pFindRet: the node found
// Returns : success or fail
//====================================================================
private bool FindInModifyTable(int nInnerCode, string sWord, int nPOS, out WordChain pFindRet)
{
WordChain pCur, pPre;
if (modifyTable != null)
{
pCur = modifyTable[nInnerCode].pWordItemHead;
pPre = null;
while (pCur != null && (Utility.CCStringCompare(pCur.data.sWord, sWord) < 0 ||
(string.Compare(pCur.data.sWord, sWord, true) == 0 && pCur.data.nPOS < nPOS)))
//sort the link chain as alphabet
{
pPre = pCur;
pCur = pCur.next;
}
pFindRet = pPre;
if (pCur != null && string.Compare(pCur.data.sWord, sWord, true) == 0 && pCur.data.nPOS == nPOS)
//The node exists, delete the node and return
return true;
else
return false;
}
pFindRet = null;
return false;
}
//====================================================================
// Func Name : FindInModifyTable
// Description: judge the word and handle exist in the modified table and its items
// Parameters : nInnerCode: the inner code of the first CHines char
// sWord: the word
// nHandle:the handle number
// *pFindRet: the node found
// Returns : success or fail
//====================================================================
private bool FindInModifyTable(int nInnerCode, string sWord, out WordChain pFindRet)
{
WordChain pCur, pPre;
if (modifyTable != null)
{
pCur = modifyTable[nInnerCode].pWordItemHead;
pPre = null;
while (pCur != null && (Utility.CCStringCompare(pCur.data.sWord, sWord) < 0))
{
pPre = pCur;
pCur = pCur.next;
}
pFindRet = pPre;
if (pCur != null && string.Compare(pCur.data.sWord, sWord, true) == 0)
return true;
else
return false;
}
pFindRet = null;
return false;
}
//====================================================================
// Func Name : FindInModifyTable
// Description: judge the word and handle exist in the modified table and its items
// Parameters : nInnerCode: the inner code of the first CHines char
// sWord: the word
// nHandle:the handle number
// Returns : success or fail
//====================================================================
private bool FindInModifyTable(int nInnerCode, string sWord, int nPOS)
{
WordChain pCur, pPre;
if (modifyTable != null)
{
pCur = modifyTable[nInnerCode].pWordItemHead;
pPre = null;
//sort the link chain as alphabet
while (pCur != null && (Utility.CCStringCompare(pCur.data.sWord, sWord) < 0 ||
(string.Compare(pCur.data.sWord, sWord, true) == 0 && pCur.data.nPOS < nPOS)))
{
pPre = pCur;
pCur = pCur.next;
}
//The node exists
if (pCur != null && string.Compare(pCur.data.sWord, sWord, true) == 0 &&
(pCur.data.nPOS == nPOS || nPOS < 0))
return true;
}
return false;
}
#endregion
#region FindFirstMatchItemInOrgTbl Method
//====================================================================
// 查找第一个满足(int nInnerCode, string sWordFunc Name)条件的位置
//====================================================================
private bool FindFirstMatchItemInOrgTbl(int nInnerCode, string sWord, out int nPosRet)
{
WordItem[] pItems = indexTable[nInnerCode].WordItems;
int nStart = 0, nEnd = indexTable[nInnerCode].nCount - 1;
int nMid = (nStart + nEnd) / 2, nCmpValue;
if (sWord.Length == 0)
{
nPosRet = 0;
return true;
}
while (nStart <= nEnd)
{
nCmpValue = Utility.CCStringCompare(pItems[nMid].sWord, sWord);
if (nCmpValue == 0)
{
//Get the first item which match the current word
while (nMid >= 0 && pItems[nMid].sWord == sWord)
nMid--;
nPosRet = ++nMid;
return true;
}
else if (nCmpValue < 0)
nStart = nMid + 1;
else if (nCmpValue > 0)
nEnd = nMid - 1;
nMid = (nStart + nEnd) / 2;
}
nPosRet = -1;
return false;
}
#endregion
#endregion
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -