📄 extractwords.cs
字号:
{
m_GameNodes.Add(obj);
}
}
}
deep = 0;
nodes.Clear();
}
}
else
{
nodes.Add(begin);
deep++;
T_WordInfo last = (T_WordInfo)words[begin];
bool nextStep = false;
bool reach = false;
int endPos = last.Position + last.Word.Length - 1;
int oldDeep = deep;
int oldSpace = spaceNum;
for (int i = begin + 1; i <= end; i++)
{
T_WordInfo cur = (T_WordInfo)words[i];
if (endPos < cur.Position + cur.Word.Length - 1)
{
endPos = cur.Position + cur.Word.Length - 1;
}
if (last.Position + last.Word.Length <= cur.Position)
{
nextStep = true;
if (reach)
{
reach = false;
spaceNum = oldSpace;
deep = oldDeep;
nodes.RemoveAt(nodes.Count - 1);
}
spaceNum += cur.Position - (last.Position + last.Word.Length);
List<int> oneNodes;
oneNodes = GameTree(words, nodes, false, i, end, ref spaceNum, ref deep);
if (oneNodes != null)
{
bool select = false;
if (m_MinSpace > spaceNum ||
(m_MinSpace == spaceNum && deep < m_MinDeep))
{
select = true;
}
else if (m_MinDeep == deep && m_MinSpace == spaceNum)
{
if (m_CompareByPos != null && m_MinSpace == 0)
{
select = m_CompareByPos(words, m_GameNodes, oneNodes);
}
else
{
select = CompareGroup(words, m_GameNodes, oneNodes, MatchDirection);
}
}
if (select)
{
reach = true;
nextStep = false;
m_MinDeep = deep;
m_MinSpace = spaceNum;
m_GameNodes.Clear();
foreach (int obj in oneNodes)
{
m_GameNodes.Add(obj);
}
}
else
{
spaceNum = oldSpace;
deep = oldDeep;
nodes.RemoveRange(deep, nodes.Count - deep);
}
}
else
{
spaceNum = oldSpace;
deep = oldDeep;
nodes.RemoveRange(deep , nodes.Count - deep);
}
}
}
if (!nextStep)
{
spaceNum += endPos - (last.Position + last.Word.Length-1);
List<int> ret = new List<int>();
foreach (int obj in nodes)
{
ret.Add(obj);
}
return ret;
}
}
return null;
}
/// <summary>
/// 最大匹配提取全文中所有匹配的单词
/// </summary>
/// <param name="fullText">全文</param>
/// <returns>返回T_WordInfo[]数组,如果没有找到一个匹配的单词,返回长度为0的数组</returns>
public List<T_WordInfo> ExtractFullTextMaxMatch(String fullText)
{
List<T_WordInfo> retWords = new List<T_WordInfo>();
List<T_WordInfo> words = ExtractFullText(fullText);
int i = 0;
while (i < words.Count)
{
T_WordInfo wordInfo = (T_WordInfo)words[i];
int j;
int rangeEndPos = 0;
for (j = i; j < words.Count-1; j++)
{
if (j - i > 16)
{
//嵌套太多的情况一般很少发生,如果发生,强行中断,以免造成博弈树遍历层次过多
//降低系统效率
break;
}
if (rangeEndPos < ((T_WordInfo)words[j]).Position + ((T_WordInfo)words[j]).Word.Length -1)
{
rangeEndPos = ((T_WordInfo)words[j]).Position + ((T_WordInfo)words[j]).Word.Length - 1;
}
if (rangeEndPos <
((T_WordInfo)words[j + 1]).Position)
{
break;
}
}
if (j > i)
{
int spaceNum = 0;
int deep = 0;
m_GameNodes = new List<int>();
m_MinDeep = 65535;
m_MinSpace = 65535 * 256;
GameTree(words, new List<int>(), true, i, j, ref spaceNum, ref deep);
foreach (int index in m_GameNodes)
{
T_WordInfo info = (T_WordInfo)words[index];
retWords.Add(info);
}
i = j + 1;
continue;
}
else
{
retWords.Add(wordInfo);
i++;
}
}
return retWords;
}
/// <summary>
/// 提取全文
/// </summary>
/// <param name="fullText">全文</param>
/// <returns>返回T_WordInfo[]数组,如果没有找到一个匹配的单词,返回长度为0的数组</returns>
public List<T_WordInfo> ExtractFullText(String fullText)
{
List<T_WordInfo> words = new List<T_WordInfo>();
if (fullText == null || fullText == "")
{
return words;
}
T_DfaUnit cur = null;
bool find = false;
int pos = 0;
int i = 0;
while (i < fullText.Length)
{
cur = m_WordDfa.Next(cur, fullText[i]);
if (cur != null && !find)
{
pos = i;
find = true;
}
if (find)
{
if (cur == null)
{
find = false;
i = pos + 1; //有可能存在包含关系的词汇,所以需要回溯
continue;
}
else if (cur.QuitWord != null)
{
T_WordInfo wordInfo = new T_WordInfo();
wordInfo.Word = cur.QuitWord;
wordInfo.Position = pos;
wordInfo.Rank = m_WordDfa.GetRank(wordInfo.Word);
wordInfo.Tag = cur.Tag;
words.Add(wordInfo);
if (cur.Childs == null)
{
find = false;
cur = null;
i = pos + 1; //有可能存在包含关系的词汇,所以需要回溯
continue;
}
}
}
i++;
}
return words;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -