⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 seglist.cs

📁 c#常用类库大全
💻 CS
📖 第 1 页 / 共 3 页
字号:
                        {
                            if (preFix == 1)
                            {
                                reText += strPrefix + strChar1 + strChar2;
                                strPrefix = "";
                                preFix = 0;
                            }
                            else if (preFix > 1)
                            {
                                reText += strPrefix + strLastWords + strChar1 + strChar2;
                                strPrefix = "";
                                preFix = 0;
                            }
                            else
                            {
                                if (CharType == 4) reText += strChar1 + strChar2;
                                else reText += strChar1 + strChar2;
                                strLastWords = this.Separator;
                                number = false;
                            }
                            i++;
                            yes = true;
                        }
                        if (reText.Length > 0) strLastChar = reText.Substring(reText.Length - 1);
                        if (CharType == 4 && GetCharType(strLastChar) == 4)
                        {
                            number = true;
                        }
                        else if (strLastChar != this.Separator) reText += this.Separator;
                    }
                    #endregion
                    break;
                default:
                    #region 未知字符,可能是生僻字,也可能是标点符合之类
                    if (word && !yes)
                    {
                        reText += Separator;
                    }
                    else if (number && !yes)
                    {
                        reText += Separator;
                    }
                    number = false;
                    word = false;
                    strLastWords = this.Separator;
                    break;
                    #endregion
            }
            if (!yes && number || !yes && word)
            {
                reText += strChar1;
                yes = true;
            }
            if (!yes)
            {
                #region 处理姓名问题
                if (preFix == 0)
                {
                    if (alPrefix.Contains(strChar1 + strChar2))
                    {
                        i++;
                        strPrefix = strChar1 + strChar2;
                        preFix++;
                    }
                    else if (alPrefix.Contains(strChar1))
                    {
                        if (!number)
                        {
                            strPrefix = strChar1;
                            preFix++;
                        }
                        else
                        {
                            reText += strChar1 + strLastWords;
                            number = false;
                            word = false;
                        }
                    }
                    else
                    {
                        if (preFix == 3)
                        {
                            reText += strPrefix + Separator + strChar1 + Separator;
                            strPrefix = "";
                            preFix = 0;
                        }
                        else if (preFix > 0)
                        {
                            if (Regex.IsMatch(strChar1, strChinese))
                            {
                                strPrefix += strChar1;
                                preFix++;
                            }
                            else
                            {
                                reText += strPrefix + Separator + strChar1 + Separator;
                                strPrefix = "";
                                preFix = 0;
                            }
                        }
                        else
                        {
                            reText += strChar1 + strLastWords;
                            number = false;
                            word = false;
                        }
                    }
                }
                else
                {
                    if (preFix == 3)
                    {
                        reText += strPrefix + Separator + strChar1 + Separator;
                        strPrefix = "";
                        preFix = 0;
                    }
                    else if (preFix > 0)
                    {
                        if (Regex.IsMatch(strChar1, strChinese))
                        {
                            strPrefix += strChar1;
                            preFix++;
                        }
                        else
                        {
                            reText += strPrefix + Separator + strChar1 + Separator;
                            strPrefix = "";
                            preFix = 0;
                        }
                    }
                    else
                    {
                        reText += strChar1 + strLastWords;
                        number = false;
                    }
                }
                #endregion
            }
            length = i;
            #endregion
        }

        #region 最后防止最后一个字的丢失
        if (length < strText.Length - 1)
        {
            string strLastChar1 = strText.Substring(strText.Length - 1).Trim();
            string strLastChar2 = strText.Substring(strText.Length - 2).Trim();

            if (reText.Length > 0) strLastChar = reText.Substring(reText.Length - 1);
            if (preFix != 0)
            {
                reText += strPrefix + strLastChar1;
            }
            else
            {
                switch (GetCharType(strLastChar1))
                {
                    case 1:
                        if (strLastChar1 != "." && strLastChar1 != ".")
                            reText += strLastChar1;
                        else
                            reText += Separator + strLastChar1;
                        break;
                    case 2:
                    case 5:
                        if (alWord.Contains(strLastChar2))
                            reText += strLastChar1;
                        break;
                    case 3:
                    case 4:
                        if ((number || word) && strLastChar != Separator)
                            reText += Separator + strLastChar1;
                        else
                            reText += strLastChar1;
                        break;
                    default:
                        if (strLastChar != Separator)
                            reText += Separator + strLastChar1;
                        else
                            reText += strLastChar1;
                        break;
                }
            }
            if (reText.Length > 0) strLastChar = (reText.Substring(reText.Length - 1));
            if (strLastChar != this.Separator) reText += this.Separator;
        }
        #endregion

        TimeSpan duration = DateTime.Now - start;
        m_EventTime = duration.TotalMilliseconds;
        return reText.Replace(" $", ""); //这里包含一个字的,则去掉
    }

    /// <summary>
    /// 重载分词过程,支持回车
    /// </summary>
    public string SegmentText(string strText, bool Enter)
    {
        if (Enter)
        {
            DateTime start = DateTime.Now;
            string[] strArr = strText.Split('\n');

            string reText = "";
            for (int i = 0; i < strArr.Length; i++)
            {
                reText += SegmentText(strArr[i]) + "\r\n";
            }

            TimeSpan duration = DateTime.Now - start;
            m_EventTime = duration.TotalMilliseconds;
            return reText;
        }
        else
        {
            return SegmentText(strText);
        }
    }

    #region 判断字符类型
    /// <summary>
    /// 判断字符类型,0为未知,1为数字,2为字母,3为汉字,4为汉字数字
    /// </summary>
    private int GetCharType(string p_Char)
    {
        int CharType = 0;
        if (alNumber.Contains(p_Char))   CharType = 1;
        if (alWord.Contains(p_Char))     CharType = 2;
        if (htWords.ContainsKey(p_Char)) CharType += 3;
        return CharType;
    }
    #endregion

    #region 对加载的词典排序并重新写入
    /// <summary>
    /// 对加载的词典排序并重新写入
    /// </summary>
    public void SortDic()
    {
        SortDic(false);
    }

    /// <summary>
    /// 对加载的词典排序并重新写入
    /// </summary>
    /// <param name="Reload">是否重新加载</param>
    public void SortDic(bool Reload)
    {
        DateTime start = DateTime.Now;
        StreamWriter sw = new StreamWriter(DicPath, false, System.Text.Encoding.UTF8);

        IDictionaryEnumerator idEnumerator1 = htWords.GetEnumerator();
        while (idEnumerator1.MoveNext())
        {
            IDictionaryEnumerator idEnumerator2 = ((Hashtable)idEnumerator1.Value).GetEnumerator();
            while (idEnumerator2.MoveNext())
            {
                SegList aa = (SegList)idEnumerator2.Value;
                aa.Sort();
                for (int i = 0; i < aa.Count; i++)
                {
                    if (aa.GetElem(i).ToString() == "null")
                        sw.WriteLine(idEnumerator1.Key.ToString() + idEnumerator2.Key.ToString());
                    else
                        sw.WriteLine(idEnumerator1.Key.ToString() + idEnumerator2.Key.ToString() + aa.GetElem(i).ToString());
                }
            }
        }
        sw.Close();

        if (Reload) InitWordDics();

        TimeSpan duration = DateTime.Now - start;
        m_EventTime = duration.TotalMilliseconds;
    }
    #endregion

    /// <summary>
    /// 删除两行完全相同的词,暂时无用!
    /// </summary>
    /// <returns>相同词条个数</returns>
    public int Optimize()
    {
        int l = 0;
        DateTime start = DateTime.Now;

        Hashtable htOptimize = new Hashtable();
        StreamReader reader = new StreamReader(DicPath, System.Text.Encoding.UTF8);
        string strline = reader.ReadLine();
        while (strline != null && strline.Trim() != "")
        {
            if (!htOptimize.ContainsKey(strline))
                htOptimize.Add(strline, null);
            else
                l++;
        }
        Console.WriteLine("ready");
        try
        {
            reader.Close();
        }
        catch { }
        StreamWriter sw = new StreamWriter(DicPath, false, System.Text.Encoding.UTF8);
        IDictionaryEnumerator ide = htOptimize.GetEnumerator();
        while (ide.MoveNext())
            sw.WriteLine(ide.Key.ToString());
        try
        {
            sw.Close();
        }
        catch { }
        TimeSpan duration = DateTime.Now - start;
        m_EventTime = duration.TotalMilliseconds;
        return l;
    }
    #endregion
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -