📄 program.cs
字号:
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using SharpICTCLAS;
namespace ConsoleTest
{
class Program
{
public static string DictPath = Path.Combine(Environment.CurrentDirectory, "Data") + Path.DirectorySeparatorChar;
public static string coreDictFile = DictPath + "coreDict.dct";
public static string biDictFile = DictPath + "BigramDict.dct";
public static string contextFile = DictPath + "nr.ctx";
public static string nrFile = DictPath + "tr.dct";
static void Main(string[] args)
{
TestDictionary();
//TestNShortPath();
//TestAtomSegment();
//TestGenerateWordNet();
//TestBiGraphGenerate();
//TestBiSegment();
//TestContextStat();
//TestCCStringCompare();
Console.Write("按下回车键退出......");
Console.ReadLine();
}
#region 测试字典的读取
public static void TestDictionary()
{
WordDictionary dict = new WordDictionary();
if (dict.Load(coreDictFile, false))
{
for (int j = 2; j <= 5; j++)
{
Console.WriteLine("====================================\r\n汉字:{0}, ID :{1}\r\n", Utility.CC_ID2Char(j), j);
Console.WriteLine(" 词长 频率 词性 词");
for (int i = 0; i < dict.indexTable[j].nCount; i++)
Console.WriteLine("{0,5} {1,6} {2,5} ({3}){4}",
dict.indexTable[j].WordItems[i].nWordLen,
dict.indexTable[j].WordItems[i].nFrequency,
Utility.GetPOSString(dict.indexTable[j].WordItems[i].nPOS),
Utility.CC_ID2Char(j),
dict.indexTable[j].WordItems[i].sWord);
}
}
else
Console.WriteLine("Wrong!");
}
#endregion
#region 测试原子分词
public static void TestAtomSegment()
{
string sSentence = @"三星SHX-132型号的(手机)1元钱256.89元12.14%百分比12%";
sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;
List<AtomNode> nodes = Segment.AtomSegment(sSentence);
for (int i = 0; i < nodes.Count; i++)
Console.WriteLine("{0,10} {1,5}", nodes[i].sWord, nodes[i].nPOS);
}
#endregion
#region 测试 N 最短路径
public static void TestNShortPath()
{
int n = 2;
List<int[]> result;
int[] aPath;
ColumnFirstDynamicArray<ChainContent> apCost = new ColumnFirstDynamicArray<ChainContent>();
apCost.SetElement(0, 1, new ChainContent(1));
apCost.SetElement(1, 2, new ChainContent(1));
apCost.SetElement(1, 3, new ChainContent(2));
apCost.SetElement(2, 3, new ChainContent(1));
apCost.SetElement(2, 4, new ChainContent(1));
apCost.SetElement(3, 4, new ChainContent(1));
apCost.SetElement(4, 5, new ChainContent(1));
apCost.SetElement(3, 6, new ChainContent(2));
apCost.SetElement(4, 6, new ChainContent(3));
apCost.SetElement(5, 6, new ChainContent(1));
Console.WriteLine(apCost.ToString());
NShortPath.Calculate(apCost, n);
NShortPath.printResultByIndex();
//----------------------------------------------------
// 所有路径
//----------------------------------------------------
Console.WriteLine("\r\n\r\n所有路径:");
for (int i = 0; i < n; i++)
{
result = NShortPath.GetPaths(i);
for (int j = 0; j < result.Count; j++)
{
aPath = result[j];
for (int k = 0; k < aPath.Length; k++)
Console.Write("{0}, ", aPath[k]);
Console.WriteLine();
}
Console.WriteLine("========================");
}
//----------------------------------------------------
// 最佳路径
//----------------------------------------------------
Console.WriteLine("\r\n最佳路径:");
aPath = NShortPath.GetBestPath();
for (int k = 0; k < aPath.Length; k++)
Console.Write("{0}, ", aPath[k]);
Console.WriteLine();
//----------------------------------------------------
// 最多 n 个路径
//----------------------------------------------------
Console.WriteLine("\r\n最多 {0} 条路径:", 5);
result = NShortPath.GetNPaths(5);
for (int j = 0; j < result.Count; j++)
{
aPath = result[j];
for (int k = 0; k < aPath.Length; k++)
Console.Write("{0}, ", aPath[k]);
Console.WriteLine();
}
}
#endregion
#region 测试初始分词
public static void TestGenerateWordNet()
{
WordDictionary coreDict = new WordDictionary();
if (!coreDict.Load(coreDictFile))
{
Console.WriteLine("字典装入错误!");
return;
}
string sSentence = @"他说的确实在理";
sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;
List<AtomNode> atomSegment = Segment.AtomSegment(sSentence);
RowFirstDynamicArray<ChainContent> m_segGraph = Segment.GenerateWordNet(atomSegment, coreDict);
Console.WriteLine(m_segGraph.ToString());
}
#endregion
#region 测试初次分词产生的二叉表
public static void TestBiGraphGenerate()
{
WordDictionary coreDict = new WordDictionary();
if (!coreDict.Load(coreDictFile))
{
Console.WriteLine("coreDict 字典装入错误!");
return;
}
WordDictionary biDict = new WordDictionary();
if (!biDict.Load(biDictFile))
{
Console.WriteLine("字典装入错误!");
return;
}
string sSentence = @"他说的确实在理";
sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;
//---原子分词
List<AtomNode> atomSegment = Segment.AtomSegment(sSentence);
//---检索词库,加入所有可能分词方案并存入链表结构
RowFirstDynamicArray<ChainContent> segGraph = Segment.GenerateWordNet(atomSegment, coreDict);
//---检索所有可能的两两组合
ColumnFirstDynamicArray<ChainContent> biGraphResult = Segment.BiGraphGenerate(segGraph, 0.1, biDict, coreDict);
Console.WriteLine(biGraphResult.ToString());
}
#endregion
#region 测试 Segment.BiSegment
public static void TestBiSegment()
{
List<string> sentence = new List<string>();
List<string> description = new List<string>();
sentence.Add(@"他说的确实在理");
description.Add(@"普通分词测试");
sentence.Add(@"张华平3-4月份来北京开会");
description.Add(@"数字切分");
sentence.Add(@"1.加强管理");
description.Add(@"剔除多余的“.”");
sentence.Add(@"他出生于1980年1月1日10点");
description.Add(@"日期合并");
sentence.Add(@"他出生于甲子年");
description.Add(@"年份识别");
sentence.Add(@"馆内陈列周恩来和邓颖超生前使用过的物品");
description.Add(@"姓名识别");
WordDictionary coreDict = new WordDictionary();
if (!coreDict.Load(coreDictFile))
{
Console.WriteLine("coreDict 字典装入错误!");
return;
}
WordDictionary biDict = new WordDictionary();
if (!biDict.Load(biDictFile))
{
Console.WriteLine("字典装入错误!");
return;
}
string sSentence;
string sDescription;
for (int i = 0; i < sentence.Count; i++)
{
sSentence = sentence[i];
sDescription = description[i];
Console.WriteLine("\r\n============ {0} ============", sDescription);
sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;
List<AtomNode> nodes = Segment.AtomSegment(sSentence);
Console.WriteLine("原子切分:");
for (int j = 0; j < nodes.Count; j++)
Console.Write("{0}, ", nodes[j].sWord);
Console.WriteLine("\r\n\r\n实际切分:");
Segment segment = new Segment(biDict, coreDict);
segment.BiSegment(sSentence, 0.1, 1);
for (int k = 0; k < segment.m_pWordSeg.Count; k++)
{
for (int j = 0; j < segment.m_pWordSeg[k].Length; j++)
Console.Write("{0}, ", segment.m_pWordSeg[k][j].sWord);
Console.WriteLine();
}
}
}
#endregion
#region 测试 ContextStat
public static void TestContextStat()
{
ContextStat cs = new ContextStat();
if (cs.Load(contextFile))
if (!cs.Save(DictPath + "nr.ctx"))
Console.WriteLine("写文件失败!");
else
Console.WriteLine("OK!");
else
Console.WriteLine("文件装载失败!");
}
#endregion
#region 测试 CCStringCompare
public static void TestCCStringCompare()
{
string[] s = { "公开赛", "公开赛", "公开信", "公开性", "公款", "公款吃喝", "公厘", "公理", "公理", "公里", "公里/小时", "公里/小时", "公里/小时", "公里数", "公历", "公例", "公立", "公粮", "公路", "公路", "公路局", "公路桥", "公路网" };
string[] s1 = { "王@、", "王@。", "王@”", "王@』", "王@,", "王@霸", "王@传", "王@大夫", "王@大娘", "王@大爷", "王@道士", "王@的", "王@家", "王@老汉", "王@两", "王@末##末", "王@女士", "王@未##人", "王@未##它", "王@先生", "王@姓", "王朝@,", "王朝@的", "王储@殿下", "王储@兼", "王储@未##人", "王府井@百货大楼", "王府井@大街", "王公@贵族", "王宫@会见", "王国@。", "王国@”", "王国@,", "王国@的", "王国@里", "王国@政府", "王后@未##人", "王码@电脑", "王牌@。", "王牌@”", "王室@成员", "王营@煤矿", "王兆国@、", "王兆国@,", "王兆国@出席", "王兆国@等", "王兆国@对", "王兆国@会见", "王兆国@及", "王兆国@今天", "王兆国@受", "王兆国@说", "王兆国@在", "王兆国@指出", "王兆国@主持", "王子@的" };
for (int i = 0; i < s.Length - 1; i++)
{
if (Utility.CCStringCompare(s[i], s[i + 1]) >= 0 && string.Compare(s[i], s[i + 1])!=0)
Console.WriteLine("出现错误:{0} <--> {1}", s[i], s[i + 1]);
}
for (int i = 0; i < s1.Length - 1; i++)
{
if (Utility.CCStringCompare(s1[i], s1[i + 1]) >= 0 && string.Compare(s1[i], s1[i + 1]) != 0)
Console.WriteLine("出现错误:{0} <--> {1}", s1[i], s1[i + 1]);
}
}
#endregion
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -