📄 worddictionary.cs
字号:
//Operation in the modify table and its items
if (FindInModifyTable(nPos, sWordDel, nPOS, out pPre))
{
pCur = modifyTable[nPos].pWordItemHead;
if (pPre != null)
pCur = pPre.next;
while (pCur != null && string.Compare(pCur.data.sWord, sWordDel, true) == 0 &&
(pCur.data.nPOS == nPOS || nPOS < 0))
{
pTemp = pCur;
//pCur is the first item
if (pPre != null)
pPre.next = pCur.next;
else
modifyTable[nPos].pWordItemHead = pCur.next;
pCur = pCur.next;
}
return true;
}
return false;
}
#endregion
#region IsExist Method
//====================================================================
// Func Name : IsExist
// Description: Check the sWord with nHandle whether exist
// Parameters : sWord: the word
// : nHandle: the nHandle
// Returns : Is Exist
//====================================================================
public bool IsExist(string sWord, int nHandle)
{
string sWordFind;
int nPos;
if (!PreProcessing(ref sWord, out nPos, out sWordFind))
return false;
return (FindInOriginalTable(nPos, sWordFind, nHandle) || FindInModifyTable(nPos, sWordFind, nHandle));
}
#endregion
#region GetWordType Method
//====================================================================
// Func Name : GetWordType
// Description: Get the type of word
// Parameters : sWord: the word
// Returns : the type
//====================================================================
public int GetWordType(string sWord)
{
int nType = Utility.charType(sWord.ToCharArray()[0]);
int nLen = Utility.GetWordLength(sWord);
//Chinese word
if (nLen > 0 && nType == Predefine.CT_CHINESE && Utility.IsAllChinese(sWord))
return Predefine.WT_CHINESE;
//Delimiter
else if (nLen > 0 && nType == Predefine.CT_DELIMITER)
return Predefine.WT_DELIMITER;
//other invalid
else
return Predefine.WT_OTHER;
}
#endregion
#region GetWordInfo Method
public WordInfo GetWordInfo(string sWord)
{
WordInfo info = new WordInfo();
info.sWord = sWord;
string sWordGet;
int nFirstCharId, nFoundPos;
WordChain pPre, pCur;
if (!PreProcessing(ref sWord, out nFirstCharId, out sWordGet))
return null;
if (FindFirstMatchItemInOrgTbl(nFirstCharId, sWordGet, out nFoundPos))
{
while (nFoundPos < indexTable[nFirstCharId].nCount && string.Compare(indexTable[nFirstCharId].WordItems[nFoundPos].sWord, sWordGet) == 0)
{
info.POSs.Add(indexTable[nFirstCharId].WordItems[nFoundPos].nPOS);
info.Frequencies.Add(indexTable[nFirstCharId].WordItems[nFoundPos].nFrequency);
info.Count++;
nFoundPos++;
}
return info;
}
//Operation in the index table and its items
if (FindInModifyTable(nFirstCharId, sWordGet, out pPre))
{
pCur = modifyTable[nFirstCharId].pWordItemHead;
if (pPre != null)
pCur = pPre.next;
while (pCur != null && string.Compare(pCur.data.sWord, sWordGet, true) == 0)
{
info.POSs.Add(pCur.data.nPOS);
info.Frequencies.Add(pCur.data.nFrequency);
info.Count++;
pCur = pCur.next;
}
return info;
}
return null;
}
#endregion
#region GetMaxMatch Method
//====================================================================
// Func Name : GetMaxMatch
// Description: Get the max match to the word
// Parameters : nHandle: the only handle which will be attached to the word
// Returns : success or fail
//====================================================================
public bool GetMaxMatch(string sWord, out string sWordRet, out int nPOSRet)
{
string sWordGet, sFirstChar;
int nFirstCharId;
WordChain pCur;
sWordRet = "";
nPOSRet = -1;
if (!PreProcessing(ref sWord, out nFirstCharId, out sWordGet))
return false;
sFirstChar = Utility.CC_ID2Char(nFirstCharId).ToString();
//在indexTable中检索以sWordGet打头的项目
int i = 0;
while (i < indexTable[nFirstCharId].nCount)
{
if (indexTable[nFirstCharId].WordItems[i].sWord.StartsWith(sWordGet))
{
sWordRet = sFirstChar + indexTable[nFirstCharId].WordItems[i].sWord;
nPOSRet = indexTable[nFirstCharId].WordItems[i].nPOS;
return true;
}
i++;
}
//在indexTable中没能找到,到modifyTable中去找
if (modifyTable == null)
return false;
pCur = modifyTable[nFirstCharId].pWordItemHead;
while (pCur != null)
{
if (pCur.data.sWord.StartsWith(sWordGet))
{
sWordRet = sFirstChar + pCur.data.sWord;
nPOSRet = pCur.data.nPOS;
return true;
}
pCur = pCur.next;
}
return false;
}
#endregion
#region GetFrequency Method
//====================================================================
// 查找词性为nPOS的sWord的词频
//====================================================================
public int GetFrequency(string sWord, int nPOS)
{
string sWordFind;
int firstCharCC_ID, nIndex;
WordChain pFound;
if (!PreProcessing(ref sWord, out firstCharCC_ID, out sWordFind))
return 0;
if (FindInOriginalTable(firstCharCC_ID, sWordFind, nPOS, out nIndex))
return indexTable[firstCharCC_ID].WordItems[nIndex].nFrequency;
if (FindInModifyTable(firstCharCC_ID, sWordFind, nPOS, out pFound))
return pFound.data.nFrequency;
return 0;
}
#endregion
#region ReleaseDict
public void ReleaseDict()
{
for (int i = 0; i < Predefine.CC_NUM; i++)
for (int j = 0; indexTable[i] != null && j < indexTable[i].nCount; j++)
indexTable[i] = null;
modifyTable = null;
}
#endregion
#region MergePOS Method
//====================================================================
// Func Name : MergePOS
// Description: Merge all the POS into nPOS,
// just get the word in the dictionary and set its POS as nPOS
// Parameters : nPOS: the only handle which will be attached to the word
// Returns : the type
//====================================================================
public bool MergePOS(int nPOS)
{
int i, j, nCompare;
string sWordPrev;
WordChain pPre, pCur, pTemp;
//Not prepare the buffer
if (modifyTable == null)
modifyTable = new ModifyTableItem[Predefine.CC_NUM];
//Operation in the index table
for (i = 0; i < Predefine.CC_NUM; i++)
{
//delete the memory of word item array in the dictionary
sWordPrev = null; //Set empty
for (j = 0; j < indexTable[i].nCount; j++)
{
nCompare = Utility.CCStringCompare(sWordPrev, indexTable[i].WordItems[j].sWord);
if ((j == 0 || nCompare < 0) && indexTable[i].WordItems[j].nFrequency != -1)
{
//Need to modify its handle
indexTable[i].WordItems[j].nPOS = nPOS; //Change its handle
sWordPrev = indexTable[i].WordItems[j].sWord;
//Refresh previous Word
}
else if (nCompare == 0 && indexTable[i].WordItems[j].nFrequency != -1)
{
//Need to delete when not delete and same as previous word
indexTable[i].WordItems[j].nFrequency = -1; //Set delete flag
modifyTable[i].nDelete += 1; //Add the number of being deleted
}
}
}
for (i = 0; i < Predefine.CC_NUM; i++)
//Operation in the modify table
{
pPre = null;
pCur = modifyTable[i].pWordItemHead;
sWordPrev = null; //Set empty
while (pCur != null)
{
if (Utility.CCStringCompare(pCur.data.sWord, sWordPrev) > 0)
{
//The new word
pCur.data.nPOS = nPOS; //Chang its handle
sWordPrev = pCur.data.sWord; //Set new previous word
pPre = pCur; //New previous pointer
pCur = pCur.next;
}
else
{
//The same word as previous,delete it.
pTemp = pCur;
if (pPre != null)
//pCur is the first item
pPre.next = pCur.next;
else
modifyTable[i].pWordItemHead = pCur.next;
pCur = pCur.next;
}
}
}
return true;
}
#endregion
#region ToTextFile Method
public bool ToTextFile(string sFileName)
{
bool isSuccess = true;
FileStream outputFile = null;
StreamWriter writer = null;
//Modification made, not to output when modify table exists.
if (modifyTable != null)
return false;
try
{
outputFile = new FileStream(sFileName, FileMode.Create, FileAccess.Write);
if (outputFile == null)
return false;
writer = new StreamWriter(outputFile, Encoding.GetEncoding("gb2312"));
for (int j = 0; j < Predefine.CC_NUM; j++)
{
writer.WriteLine("====================================\r\n汉字:{0}, ID :{1}\r\n", Utility.CC_ID2Char(j), j);
writer.WriteLine(" 词长 频率 词性 词");
for (int i = 0; i < indexTable[j].nCount; i++)
writer.WriteLine("{0,5} {1,6} {2,5} ({3}){4}",
indexTable[j].WordItems[i].nWordLen,
indexTable[j].WordItems[i].nFrequency,
Utility.GetPOSString(indexTable[j].WordItems[i].nPOS),
Utility.CC_ID2Char(j),
indexTable[j].WordItems[i].sWord);
}
}
catch
{
isSuccess = false;
}
finally
{
if (writer != null)
writer.Close();
if (outputFile != null)
outputFile.Close();
}
return isSuccess;
}
#endregion
#region Merge Method
//====================================================================
//Merge dict2 into current dictionary and the frequency ratio from dict2 and current dict is nRatio
//====================================================================
public bool Merge(WordDictionary dict2, int nRatio)
{
int i, j, k, nCmpValue;
string sWord;
//Modification made, not to output when modify table exists.
if (modifyTable != null || dict2.modifyTable != null)
return false;
for (i = 0; i < Predefine.CC_NUM; i++)
{
j = 0;
k = 0;
while (j < indexTable[i].nCount && k < dict2.indexTable[i].nCount)
{
nCmpValue = Utility.CCStringCompare(indexTable[i].WordItems[j].sWord, dict2.indexTable[i].WordItems[k].sWord);
if (nCmpValue == 0)
//Same Words and determine the different handle
{
if (indexTable[i].WordItems[j].nPOS < dict2.indexTable[i].WordItems[k].nPOS)
nCmpValue = -1;
else if (indexTable[i].WordItems[j].nPOS > dict2.indexTable[i].WordItems[k].nPOS)
nCmpValue = 1;
}
if (nCmpValue == 0)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -