⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 program.cs

📁 只是中科院分词系统的SharpICTCLAS分词系统
💻 CS
字号:
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using SharpICTCLAS;

namespace ConsoleTest
{
   class Program
   {
      public static string DictPath = Path.Combine(Environment.CurrentDirectory, "Data") + Path.DirectorySeparatorChar;
      public static string coreDictFile = DictPath + "coreDict.dct";
      public static string biDictFile = DictPath + "BigramDict.dct";
      public static string contextFile = DictPath + "nr.ctx";
      public static string nrFile = DictPath + "tr.dct";

      static void Main(string[] args)
      {
         TestDictionary();
         //TestNShortPath();
         //TestAtomSegment();
         //TestGenerateWordNet();
         //TestBiGraphGenerate();
         //TestBiSegment();
         //TestContextStat();
         //TestCCStringCompare();

         Console.Write("按下回车键退出......");
         Console.ReadLine();
      }

      #region 测试字典的读取

      public static void TestDictionary()
      {
         WordDictionary dict = new WordDictionary();
         if (dict.Load(coreDictFile, false))
         {
            for (int j = 2; j <= 5; j++)
            {
               Console.WriteLine("====================================\r\n汉字:{0}, ID :{1}\r\n", Utility.CC_ID2Char(j), j);

               Console.WriteLine("  词长  频率  词性   词");
               for (int i = 0; i < dict.indexTable[j].nCount; i++)
                  Console.WriteLine("{0,5} {1,6} {2,5}  ({3}){4}",
                     dict.indexTable[j].WordItems[i].nWordLen,
                     dict.indexTable[j].WordItems[i].nFrequency,
                     Utility.GetPOSString(dict.indexTable[j].WordItems[i].nPOS),
                     Utility.CC_ID2Char(j),
                     dict.indexTable[j].WordItems[i].sWord);
            }
         }
         else
            Console.WriteLine("Wrong!");
      }

      #endregion

      #region 测试原子分词

      public static void TestAtomSegment()
      {
         string sSentence = @"三星SHX-132型号的(手机)1元钱256.89元12.14%百分比12%";
         sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;
         List<AtomNode> nodes = Segment.AtomSegment(sSentence);
         for (int i = 0; i < nodes.Count; i++)
            Console.WriteLine("{0,10} {1,5}", nodes[i].sWord, nodes[i].nPOS);
      }

      #endregion

      #region 测试 N 最短路径

      public static void TestNShortPath()
      {
         int n = 2;
         List<int[]> result;
         int[] aPath;

         ColumnFirstDynamicArray<ChainContent> apCost = new ColumnFirstDynamicArray<ChainContent>();
         apCost.SetElement(0, 1, new ChainContent(1));
         apCost.SetElement(1, 2, new ChainContent(1));
         apCost.SetElement(1, 3, new ChainContent(2));
         apCost.SetElement(2, 3, new ChainContent(1));
         apCost.SetElement(2, 4, new ChainContent(1));
         apCost.SetElement(3, 4, new ChainContent(1));
         apCost.SetElement(4, 5, new ChainContent(1));
         apCost.SetElement(3, 6, new ChainContent(2));
         apCost.SetElement(4, 6, new ChainContent(3));
         apCost.SetElement(5, 6, new ChainContent(1));
         Console.WriteLine(apCost.ToString());

         NShortPath.Calculate(apCost, n);
         NShortPath.printResultByIndex();

         //----------------------------------------------------
         // 所有路径
         //----------------------------------------------------
         Console.WriteLine("\r\n\r\n所有路径:");
         for (int i = 0; i < n; i++)
         {
            result = NShortPath.GetPaths(i);
            for (int j = 0; j < result.Count; j++)
            {
               aPath = result[j];
               for (int k = 0; k < aPath.Length; k++)
                  Console.Write("{0}, ", aPath[k]);

               Console.WriteLine();
            }
            Console.WriteLine("========================");
         }

         //----------------------------------------------------
         // 最佳路径
         //----------------------------------------------------
         Console.WriteLine("\r\n最佳路径:");
         aPath = NShortPath.GetBestPath();
         for (int k = 0; k < aPath.Length; k++)
            Console.Write("{0}, ", aPath[k]);

         Console.WriteLine();

         //----------------------------------------------------
         // 最多 n 个路径
         //----------------------------------------------------
         Console.WriteLine("\r\n最多 {0} 条路径:", 5);
         result = NShortPath.GetNPaths(5);
         for (int j = 0; j < result.Count; j++)
         {
            aPath = result[j];
            for (int k = 0; k < aPath.Length; k++)
               Console.Write("{0}, ", aPath[k]);

            Console.WriteLine();
         }
      }

      #endregion

      #region 测试初始分词

      public static void TestGenerateWordNet()
      {
         WordDictionary coreDict = new WordDictionary();
         if (!coreDict.Load(coreDictFile))
         {
            Console.WriteLine("字典装入错误!");
            return;
         }

         string sSentence = @"他说的确实在理";
         sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;

         List<AtomNode> atomSegment = Segment.AtomSegment(sSentence);
         RowFirstDynamicArray<ChainContent> m_segGraph = Segment.GenerateWordNet(atomSegment, coreDict);

         Console.WriteLine(m_segGraph.ToString());
      }

      #endregion

      #region 测试初次分词产生的二叉表

      public static void TestBiGraphGenerate()
      {
         WordDictionary coreDict = new WordDictionary();
         if (!coreDict.Load(coreDictFile))
         {
            Console.WriteLine("coreDict 字典装入错误!");
            return;
         }

         WordDictionary biDict = new WordDictionary();
         if (!biDict.Load(biDictFile))
         {
            Console.WriteLine("字典装入错误!");
            return;
         }

         string sSentence = @"他说的确实在理";
         sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;

         //---原子分词
         List<AtomNode> atomSegment = Segment.AtomSegment(sSentence);

         //---检索词库,加入所有可能分词方案并存入链表结构
         RowFirstDynamicArray<ChainContent> segGraph = Segment.GenerateWordNet(atomSegment, coreDict);

         //---检索所有可能的两两组合
         ColumnFirstDynamicArray<ChainContent> biGraphResult = Segment.BiGraphGenerate(segGraph, 0.1, biDict, coreDict);

         Console.WriteLine(biGraphResult.ToString());
      }

      #endregion

      #region 测试 Segment.BiSegment

      public static void TestBiSegment()
      {
         List<string> sentence = new List<string>();
         List<string> description = new List<string>();

         sentence.Add(@"他说的确实在理");
         description.Add(@"普通分词测试");

         sentence.Add(@"张华平3-4月份来北京开会");
         description.Add(@"数字切分");

         sentence.Add(@"1.加强管理");
         description.Add(@"剔除多余的“.”");

         sentence.Add(@"他出生于1980年1月1日10点");
         description.Add(@"日期合并");

         sentence.Add(@"他出生于甲子年");
         description.Add(@"年份识别");

         sentence.Add(@"馆内陈列周恩来和邓颖超生前使用过的物品");
         description.Add(@"姓名识别");

         WordDictionary coreDict = new WordDictionary();
         if (!coreDict.Load(coreDictFile))
         {
            Console.WriteLine("coreDict 字典装入错误!");
            return;
         }

         WordDictionary biDict = new WordDictionary();
         if (!biDict.Load(biDictFile))
         {
            Console.WriteLine("字典装入错误!");
            return;
         }

         string sSentence;
         string sDescription;

         for (int i = 0; i < sentence.Count; i++)
         {
            sSentence = sentence[i];
            sDescription = description[i];
            Console.WriteLine("\r\n============ {0} ============", sDescription);


            sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;

            List<AtomNode> nodes = Segment.AtomSegment(sSentence);
            Console.WriteLine("原子切分:");
            for (int j = 0; j < nodes.Count; j++)
               Console.Write("{0}, ", nodes[j].sWord);

            Console.WriteLine("\r\n\r\n实际切分:");
            Segment segment = new Segment(biDict, coreDict);
            segment.BiSegment(sSentence, 0.1, 1);

            for (int k = 0; k < segment.m_pWordSeg.Count; k++)
            {
               for (int j = 0; j < segment.m_pWordSeg[k].Length; j++)
                  Console.Write("{0}, ", segment.m_pWordSeg[k][j].sWord);
               Console.WriteLine();
            }
         }
      }

      #endregion

      #region 测试 ContextStat

      public static void TestContextStat()
      {
         ContextStat cs = new ContextStat();

         if (cs.Load(contextFile))
            if (!cs.Save(DictPath + "nr.ctx"))
               Console.WriteLine("写文件失败!");
            else
               Console.WriteLine("OK!");
         else
            Console.WriteLine("文件装载失败!");
      }

      #endregion

      #region 测试 CCStringCompare

      public static void TestCCStringCompare()
      {
         string[] s = { "公开赛", "公开赛", "公开信", "公开性", "公款", "公款吃喝", "公厘", "公理", "公理", "公里", "公里/小时", "公里/小时", "公里/小时", "公里数", "公历", "公例", "公立", "公粮", "公路", "公路", "公路局", "公路桥", "公路网" };
         string[] s1 = { "王@、", "王@。", "王@”", "王@』", "王@,", "王@霸", "王@传", "王@大夫", "王@大娘", "王@大爷", "王@道士", "王@的", "王@家", "王@老汉", "王@两", "王@末##末", "王@女士", "王@未##人", "王@未##它", "王@先生", "王@姓", "王朝@,", "王朝@的", "王储@殿下", "王储@兼", "王储@未##人", "王府井@百货大楼", "王府井@大街", "王公@贵族", "王宫@会见", "王国@。", "王国@”", "王国@,", "王国@的", "王国@里", "王国@政府", "王后@未##人", "王码@电脑", "王牌@。", "王牌@”", "王室@成员", "王营@煤矿", "王兆国@、", "王兆国@,", "王兆国@出席", "王兆国@等", "王兆国@对", "王兆国@会见", "王兆国@及", "王兆国@今天", "王兆国@受", "王兆国@说", "王兆国@在", "王兆国@指出", "王兆国@主持", "王子@的" };

         for (int i = 0; i < s.Length - 1; i++)
         {
            if (Utility.CCStringCompare(s[i], s[i + 1]) >= 0 && string.Compare(s[i], s[i + 1])!=0)
               Console.WriteLine("出现错误:{0}   <-->   {1}", s[i], s[i + 1]);
         }

         for (int i = 0; i < s1.Length - 1; i++)
         {
            if (Utility.CCStringCompare(s1[i], s1[i + 1]) >= 0 && string.Compare(s1[i], s1[i + 1]) != 0)
               Console.WriteLine("出现错误:{0}   <-->   {1}", s1[i], s1[i + 1]);
         }
      }

      #endregion
   }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -