⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 matchnamerule.cs

📁 KTDictSeg 简介: KTDictSeg 是由KaiToo搜索开发的一款基于字典的简单中英文分词算法 * 主要功能: 中英文分词
💻 CS
📖 第 1 页 / 共 3 页
字号:
        /// </summary>
        public void ClearNameTraffic()
        {
            m_ChsNameTraffic.Clear();
        }

        /// <summary>
        /// 加入姓名后缀
        /// </summary>
        /// <param name="word"></param>
        public void AddAfter(String word)
        {
            m_ChsNameTraffic.AddAfter(word);
        }

        /// <summary>
        /// 加入姓名前缀
        /// </summary>
        /// <param name="word"></param>
        public void AddBefore(String word)
        {
            m_ChsNameTraffic.AddBefore(word);
        }


        /// <summary>
        /// 加载姓名统计文件
        /// </summary>
        /// <param name="fileName"></param>
        public void LoadNameTraffic(String fileName)
        {
            m_ChsNameTraffic.Load(fileName);
        }

        /// <summary>
        /// 保存姓名统计文件
        /// </summary>
        /// <param name="fileName"></param>
        public void SaveNameTraffic(String fileName)
        {
            m_ChsNameTraffic.Save(fileName);
        }

        /// <summary>
        /// 是否是中文名字
        /// </summary>
        /// <param name="familyName">姓</param>
        /// <param name="firstName">名</param>
        /// <returns>是返回true</returns>
        static public bool IsChineseName(String familyName, String firstName)
        {
            if (firstName.Length > 2 || familyName.Length > 2)
            {
                return false;
            }

            return m_FamilyNameTbl[familyName] != null;
        }



        #endregion

        #region IRule 成员

        private void Traffic(String beforeWord, String afterWord)
        {
            if (beforeWord != null)
            {
                if (beforeWord.Trim() != "")
                {
                    m_ChsNameTraffic.AddBefore(beforeWord);
                }
            }

            if (afterWord != null)
            {
                if (afterWord.Trim() != "")
                {
                    m_ChsNameTraffic.AddAfter(afterWord);
                }
            }
        }

        /// <summary>
        /// 匹配姓位于单词首部的情况
        /// </summary>
        /// <param name="preWords"></param>
        /// <param name="index"></param>
        /// <param name="retWords"></param>
        /// <returns></returns>
        private int MatchFamilyNameInHead(List<String> preWords, int index, List<String> retWords)
        {
            String curWord = (String)preWords[index];

            if (index >= preWords.Count - 1)
            {
                return -2;
            }

            if (curWord.Length > 2)
            {
                return -1;
            }

            String nextWord = (String)preWords[index + 1];

            if (curWord[0] < 0x4e00 || curWord[0] > 0x9fa5)
            {
                //不是汉字
                return -2;
            }

            if (nextWord[0] < 0x4e00 || nextWord[0] > 0x9fa5)
            {
                //不是汉字
                return -2;
            }

/*
            if (m_PosBinRule.Match(curWord, nextWord))
            {
                return -2;
            }
*/

            String familyName;

            if (curWord.Length == 1)
            {
                if (m_FamilyNameTbl[curWord] == null)
                {
                    return -1;
                }
                else
                {
                    familyName = curWord;
                }
            }
            else
            {
                if (m_FamilyNameTbl[curWord] == null)
                {
                    if (m_FamilyNameTbl[curWord[0].ToString()] == null)
                    {
                        return -1;
                    }
                    else
                    {
                        familyName = curWord[0].ToString();
                    }
                }
                else
                {
                    familyName = curWord;
                }
            }

            String name = curWord + nextWord;

            if (name.Length - familyName.Length == 1)
            {
                //单字名 还要尝试是否是双字名

                if (index < preWords.Count - 2)
                {
                    String nnext = (String)preWords[index + 2];

                    if (nnext.Length >= 2)
                    {
                        if (!m_ChsNameTraffic.MaybeNameByAfter(nnext))
                        {
                            String after = nnext.Substring(1, nnext.Length - 1);
                            nnext = nnext[0].ToString();

                            if (m_ChsNameTraffic.CompareTwoWords(nnext, after))
                            {
                                name += nnext;
                                retWords.Add(name);
                                retWords.Add(after);

                                //统计
                                if (m_AutoStudy)
                                {
                                    String afterWord = after;
                                    String beforeWord = null;
                                    if (index > 0)
                                    {
                                        beforeWord = preWords[index - 1];
                                    }

                                    Traffic(beforeWord, afterWord);
                                }

                                return index + 3;
                            }
                            else
                            {
                                if (index + 3 < preWords.Count)
                                {
                                    if (preWords[index + 3].Length == 1)
                                    {
                                        after += preWords[index + 3];
                                        if (m_ChsNameTraffic.CompareTwoWords(nnext, after))
                                        {
                                            //统计
                                            if (m_AutoStudy)
                                            {
                                                String afterWord = after;
                                                String beforeWord = null;
                                                if (index > 0)
                                                {
                                                    beforeWord = preWords[index - 1];
                                                }

                                                Traffic(beforeWord, afterWord);
                                            }

                                            name += nnext;
                                            retWords.Add(name);
                                            retWords.Add(after);
                                            return index + 4;

                                        }
                                    }
                                }

                            }
                        }
                    }
                    else if (nnext.Length == 1 &&
                        nnext[0] >= 0x4e00 && nnext[0] <= 0x9fa5)
                    {
                        bool merge = false;

                        if (index + 3 < preWords.Count)
                        {
                            merge = m_ChsNameTraffic.CompareTwoWords(preWords[index + 2], preWords[index + 3]);
                        }

                        if (!merge)
                        {
                            merge = !m_PosBinRule.MatchNameInHead(nnext);
                        }

                        if (merge)
                        {
                            //统计
                            if (m_AutoStudy)
                            {
                                String afterWord = null;
                                String beforeWord = null;
                                if (index > 0)
                                {
                                    beforeWord = preWords[index - 1];
                                }

                                if (index + 3 < preWords.Count)
                                {
                                    afterWord = preWords[index + 3];
                                }

                                Traffic(beforeWord, afterWord);
                            }

                            name += nnext;
                            retWords.Add(name);
                            return index + 3;
                        }
                    }
                }
            }
            else if (name.Length - familyName.Length > 2)
            {
                String nnext = nextWord;
                if (nnext.Length > 1)
                {
                    if (m_PosBinRule.MatchNameInHead(nnext.Substring(1, nnext.Length-1)))
                    {
                        //统计
                        if (m_AutoStudy)
                        {
                            String afterWord = null;
                            String beforeWord = null;
                            if (index > 0)
                            {
                                beforeWord = preWords[index - 1];
                            }

                            if (index + 2 < preWords.Count)
                            {
                                afterWord = preWords[index + 2];
                            }

                            Traffic(beforeWord, afterWord);
                        }

                        name = curWord + nnext[0].ToString();
                        preWords.Insert(index +2, nnext.Substring(1, nnext.Length - 1));
                        retWords.Add(name);
                        return index + 2;
                    }
                }
            }

            //统计
            if (m_AutoStudy)
            {
                String afterWord = null;
                String beforeWord = null;
                if (index > 0)
                {
                    beforeWord = preWords[index - 1];
                }

                if (index + 2 < preWords.Count)
                {
                    afterWord = preWords[index + 2];
                }

                Traffic(beforeWord, afterWord);
            }

            retWords.Add(name);
            return index + 2;
        }


        /// <summary>
        /// 根据统计结果匹配尾部是姓的情况
        /// </summary>
        /// <param name="preWords"></param>
        /// <param name="index"></param>
        /// <param name="retWords"></param>
        /// <returns></returns>
        private int MatchFamilyNameInTailByTraffic(List<String> preWords, int index, List<String> retWords)
        {
            if (retWords.Count < 1)
            {
                return -1;
            }

            String curWord = (String)retWords[retWords.Count - 1];

            if (curWord.Length < 2)
            {
                return -1;
            }

            String nextWord = (String)preWords[index];

            if (nextWord.Length > 2)
            {
                return -1;
            }

            String familyName;

            //单姓
            familyName = curWord[curWord.Length - 1].ToString();

            if (m_FamilyNameTbl[familyName] == null)
            {
                //双姓
                familyName = curWord.Substring(curWord.Length - 2, 2);
                if (m_FamilyNameTbl[familyName] == null)
                {
                    return -1;
                }
            }

            String remain = curWord.Substring(0, curWord.Length - familyName.Length);

            if (retWords.Count > 0)
            {
                //重新组合前面的词,并判断词性匹配
                String newWord = null;
                bool isReg;

                if (retWords.Count > 1)
                {
                    newWord = retWords[retWords.Count - 2] + remain;
                    if (!m_ChsNameTraffic.MaybeNameByBefore(newWord))
                    {
                        newWord = null;
                    }

                    if (newWord != null)
                    {
                        retWords.RemoveAt(retWords.Count - 1);
                        retWords.RemoveAt(retWords.Count - 1);
                    }

                }

                if (newWord != null)
                {
                    retWords.Add(newWord);
                }
                else
                {
                    return -1;
                }
            }

            String name = familyName + nextWord;

            if (name.Length - familyName.Length == 1)
            {
                //单字名 还要尝试是否是双字名

                if (index < preWords.Count - 1)
                {
                    String nnext = name + (String)preWords[index + 1];
                    nnext = nnext.Substring(familyName.Length, nnext.Length - familyName.Length);

                    if (nnext.Length <= 2)
                    {
                        bool merge = false;

                        if (index + 2 < preWords.Count)
                        {
                            merge = m_ChsNameTraffic.CompareTwoWords(preWords[index + 1], preWords[index + 2]);
                        }

                        if (!merge)
                        {
                            merge = !m_PosBinRule.MatchNameInHead(nnext);
                        }

                        if (merge)
                        {
                            //统计
                            if (m_AutoStudy)
                            {
                                String afterWord = null;
                                String beforeWord = null;
                                if (retWords.Count > 0)
                                {
                                    beforeWord = retWords[retWords.Count - 1];
                                }

                                if (index + 2 < preWords.Count)
                                {
                                    afterWord = preWords[index + 2];
                                }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -