⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 worddictionary.cs

📁 只是中科院分词系统的SharpICTCLAS分词系统
💻 CS
📖 第 1 页 / 共 3 页
字号:

         //Operation in the modify table and its items
         if (FindInModifyTable(nPos, sWordDel, nPOS, out pPre))
         {
            pCur = modifyTable[nPos].pWordItemHead;
            if (pPre != null)
               pCur = pPre.next;
            while (pCur != null && string.Compare(pCur.data.sWord, sWordDel, true) == 0 &&
               (pCur.data.nPOS == nPOS || nPOS < 0))
            {
               pTemp = pCur;
               //pCur is the first item
               if (pPre != null)
                  pPre.next = pCur.next;
               else
                  modifyTable[nPos].pWordItemHead = pCur.next;

               pCur = pCur.next;
            }
            return true;
         }
         return false;
      }

      #endregion

      #region IsExist Method

      //====================================================================
      // Func Name  : IsExist
      // Description: Check the sWord with nHandle whether exist
      // Parameters : sWord: the word
      //            : nHandle: the nHandle
      // Returns    : Is Exist
      //====================================================================
      public bool IsExist(string sWord, int nHandle)
      {
         string sWordFind;
         int nPos;

         if (!PreProcessing(ref sWord, out nPos, out sWordFind))
            return false;

         return (FindInOriginalTable(nPos, sWordFind, nHandle) || FindInModifyTable(nPos, sWordFind, nHandle));
      }

      #endregion

      #region GetWordType Method

      //====================================================================
      // Func Name  : GetWordType
      // Description: Get the type of word
      // Parameters : sWord: the word
      // Returns    : the type
      //====================================================================
      public int GetWordType(string sWord)
      {
         int nType = Utility.charType(sWord.ToCharArray()[0]);
         int nLen = Utility.GetWordLength(sWord);

         //Chinese word
         if (nLen > 0 && nType == Predefine.CT_CHINESE && Utility.IsAllChinese(sWord))
            return Predefine.WT_CHINESE;
         //Delimiter
         else if (nLen > 0 && nType == Predefine.CT_DELIMITER)
            return Predefine.WT_DELIMITER;
         //other invalid
         else
            return Predefine.WT_OTHER;
      }

      #endregion

      #region GetWordInfo Method

      public WordInfo GetWordInfo(string sWord)
      {
         WordInfo info = new WordInfo();
         info.sWord = sWord;

         string sWordGet;
         int nFirstCharId, nFoundPos;
         WordChain pPre, pCur;

         if (!PreProcessing(ref sWord, out nFirstCharId, out sWordGet))
            return null;

         if (FindFirstMatchItemInOrgTbl(nFirstCharId, sWordGet, out nFoundPos))
         {
            while (nFoundPos < indexTable[nFirstCharId].nCount && string.Compare(indexTable[nFirstCharId].WordItems[nFoundPos].sWord, sWordGet) == 0)
            {
               info.POSs.Add(indexTable[nFirstCharId].WordItems[nFoundPos].nPOS);
               info.Frequencies.Add(indexTable[nFirstCharId].WordItems[nFoundPos].nFrequency);
               info.Count++;

               nFoundPos++;
            }
            return info;
         }

         //Operation in the index table and its items
         if (FindInModifyTable(nFirstCharId, sWordGet, out pPre))
         {
            pCur = modifyTable[nFirstCharId].pWordItemHead;

            if (pPre != null)
               pCur = pPre.next;

            while (pCur != null && string.Compare(pCur.data.sWord, sWordGet, true) == 0)
            {
               info.POSs.Add(pCur.data.nPOS);
               info.Frequencies.Add(pCur.data.nFrequency);
               info.Count++;
               pCur = pCur.next;
            }
            return info;
         }
         return null;
      }

      #endregion

      #region GetMaxMatch Method

      //====================================================================
      // Func Name  : GetMaxMatch
      // Description: Get the max match to the word
      // Parameters : nHandle: the only handle which will be attached to the word
      // Returns    : success or fail
      //====================================================================
      public bool GetMaxMatch(string sWord, out string sWordRet, out int nPOSRet)
      {
         string sWordGet, sFirstChar;
         int nFirstCharId;
         WordChain pCur;

         sWordRet = "";
         nPOSRet = -1;

         if (!PreProcessing(ref sWord, out nFirstCharId, out sWordGet))
            return false;

         sFirstChar = Utility.CC_ID2Char(nFirstCharId).ToString();

         //在indexTable中检索以sWordGet打头的项目
         int i = 0;
         while (i < indexTable[nFirstCharId].nCount)
         {
            if (indexTable[nFirstCharId].WordItems[i].sWord.StartsWith(sWordGet))
            {
               sWordRet = sFirstChar + indexTable[nFirstCharId].WordItems[i].sWord;
               nPOSRet = indexTable[nFirstCharId].WordItems[i].nPOS;
               return true;
            }
            i++;
         }

         //在indexTable中没能找到,到modifyTable中去找
         if (modifyTable == null)
            return false;

         pCur = modifyTable[nFirstCharId].pWordItemHead;
         while (pCur != null)
         {
            if (pCur.data.sWord.StartsWith(sWordGet))
            {
               sWordRet = sFirstChar + pCur.data.sWord;
               nPOSRet = pCur.data.nPOS;
               return true;
            }
            pCur = pCur.next;
         }

         return false;
      }

      #endregion

      #region GetFrequency Method

      //====================================================================
      // 查找词性为nPOS的sWord的词频
      //====================================================================
      public int GetFrequency(string sWord, int nPOS)
      {
         string sWordFind;
         int firstCharCC_ID, nIndex;
         WordChain pFound;

         if (!PreProcessing(ref sWord, out firstCharCC_ID, out sWordFind))
            return 0;

         if (FindInOriginalTable(firstCharCC_ID, sWordFind, nPOS, out nIndex))
            return indexTable[firstCharCC_ID].WordItems[nIndex].nFrequency;

         if (FindInModifyTable(firstCharCC_ID, sWordFind, nPOS, out pFound))
            return pFound.data.nFrequency;

         return 0;
      }

      #endregion

      #region ReleaseDict

      public void ReleaseDict()
      {
         for (int i = 0; i < Predefine.CC_NUM; i++)
            for (int j = 0; indexTable[i] != null && j < indexTable[i].nCount; j++)
               indexTable[i] = null;

         modifyTable = null;
      }

      #endregion

      #region MergePOS Method

      //====================================================================
      // Func Name  : MergePOS
      // Description: Merge all the POS into nPOS,
      //              just get the word in the dictionary and set its POS as nPOS
      // Parameters : nPOS: the only handle which will be attached to the word
      // Returns    : the type
      //====================================================================
      public bool MergePOS(int nPOS)
      {
         int i, j, nCompare;
         string sWordPrev;
         WordChain pPre, pCur, pTemp;

         //Not prepare the buffer
         if (modifyTable == null)
            modifyTable = new ModifyTableItem[Predefine.CC_NUM];

         //Operation in the index table
         for (i = 0; i < Predefine.CC_NUM; i++)
         {
            //delete the memory of word item array in the dictionary
            sWordPrev = null; //Set empty
            for (j = 0; j < indexTable[i].nCount; j++)
            {
               nCompare = Utility.CCStringCompare(sWordPrev, indexTable[i].WordItems[j].sWord);
               if ((j == 0 || nCompare < 0) && indexTable[i].WordItems[j].nFrequency != -1)
               {
                  //Need to modify its handle
                  indexTable[i].WordItems[j].nPOS = nPOS; //Change its handle
                  sWordPrev = indexTable[i].WordItems[j].sWord;
                  //Refresh previous Word
               }
               else if (nCompare == 0 && indexTable[i].WordItems[j].nFrequency != -1)
               {
                  //Need to delete when not delete and same as previous word
                  indexTable[i].WordItems[j].nFrequency = -1; //Set delete flag
                  modifyTable[i].nDelete += 1; //Add the number of being deleted
               }
            }
         }
         for (i = 0; i < Predefine.CC_NUM; i++)
         //Operation in the modify table
         {
            pPre = null;
            pCur = modifyTable[i].pWordItemHead;
            sWordPrev = null; //Set empty
            while (pCur != null)
            {
               if (Utility.CCStringCompare(pCur.data.sWord, sWordPrev) > 0)
               {
                  //The new word
                  pCur.data.nPOS = nPOS; //Chang its handle
                  sWordPrev = pCur.data.sWord; //Set new previous word
                  pPre = pCur; //New previous pointer
                  pCur = pCur.next;
               }
               else
               {
                  //The same word as previous,delete it.
                  pTemp = pCur;
                  if (pPre != null)
                     //pCur is the first item
                     pPre.next = pCur.next;
                  else
                     modifyTable[i].pWordItemHead = pCur.next;
                  pCur = pCur.next;
               }
            }
         }
         return true;
      }

      #endregion

      #region ToTextFile Method

      public bool ToTextFile(string sFileName)
      {
         bool isSuccess = true;
         FileStream outputFile = null;
         StreamWriter writer = null;

         //Modification made, not to output when modify table exists.
         if (modifyTable != null)
            return false;

         try
         {
            outputFile = new FileStream(sFileName, FileMode.Create, FileAccess.Write);
            if (outputFile == null)
               return false;

            writer = new StreamWriter(outputFile, Encoding.GetEncoding("gb2312"));

            for (int j = 0; j < Predefine.CC_NUM; j++)
            {
               writer.WriteLine("====================================\r\n汉字:{0}, ID :{1}\r\n", Utility.CC_ID2Char(j), j);

               writer.WriteLine("  词长  频率  词性   词");
               for (int i = 0; i < indexTable[j].nCount; i++)
                  writer.WriteLine("{0,5} {1,6} {2,5}  ({3}){4}",
                     indexTable[j].WordItems[i].nWordLen,
                     indexTable[j].WordItems[i].nFrequency,
                     Utility.GetPOSString(indexTable[j].WordItems[i].nPOS),
                     Utility.CC_ID2Char(j),
                     indexTable[j].WordItems[i].sWord);
            }
         }
         catch
         {
            isSuccess = false;
         }
         finally
         {
            if (writer != null)
               writer.Close();

            if (outputFile != null)
               outputFile.Close();
         }
         return isSuccess;
      }

      #endregion

      #region Merge Method

      //====================================================================
      //Merge dict2 into current dictionary and the frequency ratio from dict2 and current dict is nRatio
      //====================================================================
      public bool Merge(WordDictionary dict2, int nRatio)
      {
         int i, j, k, nCmpValue;
         string sWord;

         //Modification made, not to output when modify table exists.
         if (modifyTable != null || dict2.modifyTable != null)
            return false;

         for (i = 0; i < Predefine.CC_NUM; i++)
         {
            j = 0;
            k = 0;
            while (j < indexTable[i].nCount && k < dict2.indexTable[i].nCount)
            {
               nCmpValue = Utility.CCStringCompare(indexTable[i].WordItems[j].sWord, dict2.indexTable[i].WordItems[k].sWord);
               if (nCmpValue == 0)
               //Same Words and determine the different handle
               {
                  if (indexTable[i].WordItems[j].nPOS < dict2.indexTable[i].WordItems[k].nPOS)
                     nCmpValue = -1;
                  else if (indexTable[i].WordItems[j].nPOS > dict2.indexTable[i].WordItems[k].nPOS)
                     nCmpValue = 1;
               }

               if (nCmpValue == 0)
               {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -