📄 segment.cs
字号:
continue;
}
}
pCur = pCur.next;
pNext = pNext.next;
}
}
#endregion
#region ChangeDelimiterPOS Method
private static void ChangeDelimiterPOS(ref WordLinkedArray linkedArray)
{
WordNode pCur = linkedArray.first;
while (pCur != null)
{
if (pCur.theWord.sWord == "--" || pCur.theWord.sWord == "—" || pCur.theWord.sWord == "-")
{
pCur.theWord.nPOS = 30464; //'w'*256;Set the POS with 'w'
pCur.theWord.dValue = 0;
}
pCur = pCur.next;
}
}
#endregion
#region SplitMiddleSlashFromDigitalWords Method
//====================================================================
//如果前一个词是数字,当前词以“-”或“-”开始,并且不止这一个字符,
//那么将此“-”符号从当前词中分离出来。
//例如 “3 / -4 / 月”需要拆分成“3 / - / 4 / 月”
//====================================================================
private static void SplitMiddleSlashFromDigitalWords(ref WordLinkedArray linkedArray)
{
if (linkedArray.Count < 2)
return;
WordNode pCur = linkedArray.first.next;
WordNode pPre = linkedArray.first;
while (pCur != null)
{
//27904='m'*256
if ((Math.Abs(pPre.theWord.nPOS) == 27904 || Math.Abs(pPre.theWord.nPOS) == 29696) &&
(Utility.IsAllNum(pCur.theWord.sWord) || Utility.IsAllChineseNum(pCur.theWord.sWord)) &&
("--".IndexOf(pCur.theWord.sWord.ToCharArray()[0]) >= 0) && pCur.theWord.sWord.Length > 1)
{
// 将“-”拆分出来。
WordNode newNode = new WordNode();
newNode.row = pCur.row + 1;
newNode.col = pCur.col;
newNode.sWordInSegGraph = pCur.theWord.sWord.Substring(1);
WordResult theWord = new WordResult();
theWord.sWord = newNode.sWordInSegGraph;
theWord.nPOS = 27904;
theWord.dValue = pCur.theWord.dValue;
newNode.theWord = theWord;
pCur.col = pCur.row + 1;
pCur.theWord.sWord = pCur.theWord.sWord.Substring(0, 1);
pCur.theWord.nPOS = 30464; //'w'*256;
pCur.theWord.dValue = 0;
newNode.next = pCur.next;
pCur.next = newNode;
linkedArray.Count++;
}
pCur = pCur.next;
pPre = pPre.next;
}
}
#endregion
#region CheckDateElements Method
//====================================================================
//1、如果当前词是数字,下一个词是“月、日、时、分、秒、月份”中的一个,则合并且当前词词性是时间
//2、如果当前词是可以作为年份的数字,下一个词是“年”,则合并,词性为时间,否则为数字。
//3、如果最后一个汉字是"点" ,则认为当前数字是时间
//4、如果当前串最后一个汉字不是"∶·./"和半角的'.''/',那么是数
//5、当前串最后一个汉字是"∶·./"和半角的'.''/',且长度大于1,那么去掉最后一个字符。例如"1."
//====================================================================
private static void CheckDateElements(ref WordLinkedArray linkedArray)
{
if (linkedArray.Count < 2)
return;
string nextWord;
WordNode pCur = linkedArray.first;
WordNode pNext = pCur.next;
while (pNext != null)
{
if (Utility.IsAllNum(pCur.theWord.sWord) || Utility.IsAllChineseNum(pCur.theWord.sWord))
{
//===== 1、如果当前词是数字,下一个词是“月、日、时、分、秒、月份”中的一个,则合并且当前词词性是时间
nextWord = pNext.theWord.sWord;
if ((nextWord.Length == 1 && "月日时分秒".IndexOf(nextWord) != -1) || (nextWord.Length == 2 && nextWord == "月份"))
{
//2001年
pCur.theWord.sWord += nextWord;
pCur.col = pNext.col;
pCur.sWordInSegGraph = "未##时";
pCur.theWord.nPOS = -29696; //'t'*256;//Set the POS with 'm'
pCur.next = pNext.next;
pNext = pCur.next;
linkedArray.Count--;
}
//===== 2、如果当前词是可以作为年份的数字,下一个词是“年”,则合并,词性为时间,否则为数字。
else if (nextWord == "年")
{
if (IsYearTime(pCur.theWord.sWord))
{
pCur.theWord.sWord += nextWord;
pCur.col = pNext.col;
pCur.sWordInSegGraph = "未##时";
pCur.theWord.nPOS = -29696; //'t'*256;//Set the POS with 'm'
pCur.next = pNext.next;
pNext = pCur.next;
linkedArray.Count--;
}
//===== 否则当前词就是数字了 =====
else
{
pCur.sWordInSegGraph = "未##数";
pCur.theWord.nPOS = -27904; //Set the POS with 'm'
}
}
else
{
//===== 3、如果最后一个汉字是"点" ,则认为当前数字是时间
if (pCur.theWord.sWord.EndsWith("点"))
{
pCur.sWordInSegGraph = "未##时";
pCur.theWord.nPOS = -29696; //Set the POS with 't'
}
else
{
char[] tmpcharArray = pCur.theWord.sWord.ToCharArray();
string lastChar = tmpcharArray[tmpcharArray.Length - 1].ToString();
//===== 4、如果当前串最后一个汉字不是"∶·./"和半角的'.''/',那么是数
if ("∶·././".IndexOf(lastChar) == -1)
{
pCur.sWordInSegGraph = "未##数";
pCur.theWord.nPOS = -27904; //'m'*256;Set the POS with 'm'
}
//===== 5、当前串最后一个汉字是"∶·./"和半角的'.''/',且长度大于1,那么去掉最后一个字符。例如"1."
else if (pCur.theWord.sWord.Length > 1)
{
pCur.theWord.sWord = pCur.theWord.sWord.Substring(0, pCur.theWord.sWord.Length - 1);
pCur.sWordInSegGraph = "未##数";
pCur.theWord.nPOS = -27904; //'m'*256;Set the POS with 'm'
}
}
}
}
pCur = pCur.next;
pNext = pNext.next;
}
}
#endregion
#region IsYearTime Method
private static bool IsYearTime(string sNum)
{
//Judge whether the sNum is a num genearating year
int nLen = sNum.Length;
char[] charArray = sNum.ToCharArray();
//1992年, 90年
if (Utility.IsAllNum(sNum) && (nLen == 4 || nLen == 2 && "5678956789".IndexOf(charArray[0]) != -1))
return true;
if (Utility.GetCharCount("零○一二三四五六七八九壹贰叁肆伍陆柒捌玖", sNum) == nLen && nLen >= 2)
return true;
//二仟零二年
if (nLen == 4 && Utility.GetCharCount("千仟零○", sNum) == 2)
return true;
if (nLen == 1 && Utility.GetCharCount("千仟", sNum) == 1)
return true;
if (nLen == 2 && Regex.IsMatch(sNum, "^[甲乙丙丁戊己庚辛壬癸][子丑寅卯辰巳午未申酉戌亥]$"))
return true;
return false;
}
#endregion
#endregion
#region Events
private void SendEvents(SegmentEventArgs e)
{
if (OnSegmentEvent != null)
OnSegmentEvent(this, e);
}
private void OnAtomSegment(List<AtomNode> nodes)
{
StringBuilder sb = new StringBuilder();
for (int i = 0; i < nodes.Count; i++)
sb.Append(string.Format("{0}, ", nodes[i].sWord));
sb.Append("\r\n");
SendEvents(new SegmentEventArgs(SegmentStage.AtomSegment, sb.ToString()));
}
private void OnGenSegGraph(RowFirstDynamicArray<ChainContent> segGraph)
{
SendEvents(new SegmentEventArgs(SegmentStage.GenSegGraph, segGraph.ToString()));
}
private void OnGenBiSegGraph(ColumnFirstDynamicArray<ChainContent> biGraph)
{
SendEvents(new SegmentEventArgs(SegmentStage.GenBiSegGraph, biGraph.ToString()));
}
private void OnNShortPath(List<int[]> paths, RowFirstDynamicArray<ChainContent> segGraph)
{
List<ChainItem<ChainContent>> list = segGraph.ToListItems();
string theWord;
int[] aPath;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < paths.Count; i++)
{
aPath = paths[i];
for (int j = 0; j < aPath.Length; j++)
{
theWord = list[aPath[j]].Content.sWord;
if (theWord == "未##人" || theWord == "未##地" || theWord == "未##数" || theWord == "未##时" || theWord == "未##串")
{
for (int k = list[aPath[j]].row; k < list[aPath[j]].col; k++)
sb.Append(atomSegment[k].sWord);
sb.Append(", ");
}
else
sb.Append(string.Format("{0}, ", list[aPath[j]].Content.sWord));
}
sb.Append("\r\n");
}
SendEvents(new SegmentEventArgs(SegmentStage.NShortPath, sb.ToString()));
}
private void OnBeforeOptimize(List<WordResult[]> m_pWordSeg)
{
StringBuilder sb = new StringBuilder();
for (int k = 0; k < m_pWordSeg.Count; k++)
{
for (int j = 0; j < m_pWordSeg[k].Length; j++)
sb.Append(string.Format("{0}, ", m_pWordSeg[k][j].sWord));
sb.Append("\r\n");
}
SendEvents(new SegmentEventArgs(SegmentStage.BeforeOptimize, sb.ToString()));
}
private void OnOptimumSegment(RowFirstDynamicArray<ChainContent> m_graphOptimum)
{
SendEvents(new SegmentEventArgs(SegmentStage.OptimumSegment, m_graphOptimum.ToString()));
}
private void OnGenBiOptimumSegGraph(ColumnFirstDynamicArray<ChainContent> biOptGraph)
{
SendEvents(new SegmentEventArgs(SegmentStage.GenBiSegGraph, biOptGraph.ToString()));
}
#endregion
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -