📄 span.cs
字号:
nLittleFreqCount++;
//The counter increase
sPersonName += m_sWords[nPos];
nPos += 1;
}
/*
if(IsAllForeign(sPersonName)&&personDict.GetFrequency(m_sWords[j],1)<LITTLE_FREQUENCY)
{//Exclusion foreign name
//Rule 2 for exclusion:若均为外国人名用字 规则(名1+名2)失效
j+=nPatternLen[k]-1;
continue;
}
*/
if (string.Compare(sPatterns[k], "CDCD") == 0)
{
//Rule for exclusion
//规则(名1+名2+名1+名2)本身是排除规则:女高音歌唱家迪里拜尔演唱
//Rule 3 for exclusion:含外国人名用字 规则适用
//否则,排除规则失效:黑妞白妞姐俩拔了头筹。
if (Utility.GetForeignCharCount(sPersonName) > 0)
j += nPatternLen[k] - 1;
continue;
}
/*
if(strcmp(sPatterns[k],"CD")==0&&IsAllForeign(sPersonName))
{//
j+=nPatternLen[k]-1;
continue;
}
if(nLittleFreqCount==nPatternLen[k]||nLittleFreqCount==3)
//马哈蒂尔;小扎耶德与他的中国阿姨胡彩玲受华黎明大使之邀,
//The all roles appear with two lower frequecy,we will ignore them
continue;
*/
m_nUnknownWords[m_nUnknownWordsCount, 0] = m_nWordPosition[j];
m_nUnknownWords[m_nUnknownWordsCount, 1] = m_nWordPosition[j + nPatternLen[k]];
m_dWordsPossibility[m_nUnknownWordsCount] = -Math.Log(dFactor[k]) + ComputePossibility(j, nPatternLen[k], personDict);
//Mutiply the factor
m_nUnknownWordsCount += 1;
j += nPatternLen[k];
bMatched = true;
}
}
if (!bMatched)
//Not matched, add j by 1
j += 1;
}
return true;
}
#endregion
#region GuessPOS Method
//Guess the POS of No. nIndex word item
private bool GuessPOS(int nIndex, out int pSubIndex)
{
int j = 0, i = nIndex, nCharType;
int nLen;
switch (m_tagType)
{
case TAG_TYPE.TT_NORMAL:
m_nTags[i, j] = Utility.GetPOSValue("x"); //对于没有任何词性的词认为是字符串
m_dFrequency[i, j++] = 0;
break;
case TAG_TYPE.TT_PERSON:
j = 0;
if ("××".IndexOf(m_sWords[nIndex]) != -1)
{
m_nTags[i, j] = 6;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 6) + 1);
}
else
{
m_nTags[i, j] = 0;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 0) + 1);
nLen = m_sWords[nIndex].Length;
if (nLen >= 2)
{
m_nTags[i, j] = 0;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 0) + 1);
m_nTags[i, j] = 11;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 11) * 8);
m_nTags[i, j] = 12;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 12) * 8);
m_nTags[i, j] = 13;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 13) * 8);
}
else if (nLen == 1)
{
m_nTags[i, j] = 0;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 0) + 1);
nCharType = Utility.charType(m_sWords[nIndex].ToCharArray()[0]);
if (nCharType == Predefine.CT_OTHER || nCharType == Predefine.CT_CHINESE)
{
m_nTags[i, j] = 1;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 1) + 1);
m_nTags[i, j] = 2;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 2) + 1);
m_nTags[i, j] = 3;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 3) + 1);
m_nTags[i, j] = 4;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 4) + 1);
}
m_nTags[i, j] = 11;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 11) * 8);
m_nTags[i, j] = 12;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 12) * 8);
m_nTags[i, j] = 13;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 13) * 8);
}
}
break;
case TAG_TYPE.TT_PLACE:
j = 0;
m_nTags[i, j] = 0;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 0) + 1);
nLen = m_sWords[nIndex].Length;
if (nLen >= 2)
{
m_nTags[i, j] = 11;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 11) * 8);
m_nTags[i, j] = 12;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 12) * 8);
m_nTags[i, j] = 13;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 13) * 8);
}
else if (nLen == 1)
{
m_nTags[i, j] = 0;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 0) + 1);
nCharType = Utility.charType(m_sWords[nIndex].ToCharArray()[0]);
if (nCharType == Predefine.CT_OTHER || nCharType == Predefine.CT_CHINESE)
{
m_nTags[i, j] = 1;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 1) + 1);
m_nTags[i, j] = 2;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 2) + 1);
m_nTags[i, j] = 3;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 3) + 1);
m_nTags[i, j] = 4;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 4) + 1);
}
m_nTags[i, j] = 11;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 11) * 8);
m_nTags[i, j] = 12;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 12) * 8);
m_nTags[i, j] = 13;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 13) * 8);
}
break;
case TAG_TYPE.TT_TRANS_PERSON:
j = 0;
nLen = m_sWords[nIndex].Length;
m_nTags[i, j] = 0;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 0) + 1);
if (!Utility.IsAllChinese(m_sWords[nIndex]))
{
if (Utility.IsAllLetter(m_sWords[nIndex]))
{
m_nTags[i, j] = 1;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 1) + 1);
m_nTags[i, j] = 11;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 11) + 1);
m_nTags[i, j] = 2;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 2) * 2 + 1);
m_nTags[i, j] = 3;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 3) * 2 + 1);
m_nTags[i, j] = 12;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 12) * 2 + 1);
m_nTags[i, j] = 13;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 13) * 2 + 1);
}
m_nTags[i, j] = 41;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 41) * 8);
m_nTags[i, j] = 42;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 42) * 8);
m_nTags[i, j] = 43;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 43) * 8);
}
else if (nLen >= 2)
{
m_nTags[i, j] = 41;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 41) * 8);
m_nTags[i, j] = 42;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 42) * 8);
m_nTags[i, j] = 43;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 43) * 8);
}
else if (nLen == 1)
{
nCharType = Utility.charType(m_sWords[nIndex].ToCharArray()[0]);
if (nCharType == Predefine.CT_OTHER || nCharType == Predefine.CT_CHINESE)
{
m_nTags[i, j] = 1;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 1) * 2 + 1);
m_nTags[i, j] = 2;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 2) * 2 + 1);
m_nTags[i, j] = 3;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 3) * 2 + 1);
m_nTags[i, j] = 30;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 30) * 8 + 1);
m_nTags[i, j] = 11;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 11) * 4 + 1);
m_nTags[i, j] = 12;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 12) * 4 + 1);
m_nTags[i, j] = 13;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 13) * 4 + 1);
m_nTags[i, j] = 21;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 21) * 2 + 1);
m_nTags[i, j] = 22;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 22) * 2 + 1);
m_nTags[i, j] = 23;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 23) * 2 + 1);
}
m_nTags[i, j] = 41;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 41) * 8);
m_nTags[i, j] = 42;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 42) * 8);
m_nTags[i, j] = 43;
m_dFrequency[i, j++] = 1.0 / (m_context.GetFrequency(0, 43) * 8);
}
break;
default:
break;
}
pSubIndex = j;
return true;
}
#endregion
#region ComputePossibility Method
private double ComputePossibility(int nStartPos, int nLength, WordDictionary dict)
{
double dRetValue = 0, dPOSPoss;
//dPOSPoss: the possibility of a POS appears
//dContextPoss: The possibility of context POS appears
int nFreq;
for (int i = nStartPos; i < nStartPos + nLength; i++)
{
nFreq = dict.GetFrequency(m_sWords[i], m_nBestTag[i]);
//nFreq is word being the POS
dPOSPoss = Math.Log((double)(m_context.GetFrequency(0, m_nBestTag[i]) + 1)) - Math.Log((double)(nFreq + 1));
dRetValue += dPOSPoss;
/*
if(i<nStartPos+nLength-1)
{
dContextPoss=log((double)(m_context.GetContextPossibility(0,m_nBestTag[i],m_nBestTag[i+1])+1));
dRetValue+=dPOSPoss-dContextPoss;
}
*/
}
return dRetValue;
}
#endregion
#region PlaceRecognize Method
public bool PlaceRecognize(WordDictionary dictCore, WordDictionary placeDict)
{
int nStart = 1, nEnd = 1, i = 1, nTemp;
double dPanelty = 1.0; //Panelty value
while (m_nBestTag[i] > -1)
{
if (m_nBestTag[i] == 1)
//1 Trigger the recognition procession
{
nStart = i;
nEnd = nStart + 1;
//=========== by zhenyulu: 此处nEnd = nStart + 1;有些强迫之嫌,因此后面处理了一下
while (m_nBestTag[nEnd] == 1)
//
{
if (nEnd > nStart + 1)
dPanelty += 1.0;
nEnd++;
}
while (m_nBestTag[nEnd] == 2)
//2,12,22
nEnd++;
nTemp = nEnd;
while (m_nBestTag[nEnd] == 3)
{
if (nEnd > nTemp)
dPanelty += 1.0;
nEnd++;
}
}
else if (m_nBestTag[i] == 2)
//1,11,21 Trigger the recognition
{
dPanelty += 1.0;
nStart = i;
nEnd = nStart + 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -