⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 seglist.cs

📁 c#常用类库大全
💻 CS
📖 第 1 页 / 共 3 页
字号:
using System;
using System.Collections;
using System.IO;
using System.Text.RegularExpressions;

/// <summary>
/// 分词辅助类
/// </summary>
public class SegList
{
    public int MaxLength;
    private ArrayList m_seg;

    public int Count
    {
        get
        {
            return m_seg.Count;
        }
    }

    public SegList()
    {
        m_seg = new ArrayList();
        MaxLength = 0;
    }

    public void Add(object obj)
    {
        m_seg.Add(obj);
        if (MaxLength < obj.ToString().Length)
        {
            MaxLength = obj.ToString().Length;
        }
    }

    public object GetElem(int i)
    {
        if (i < this.Count)
            return m_seg[i];
        else
            return null;
    }

    public void SetElem(int i, object obj)
    {
        m_seg[i] = obj;
    }

    public bool Contains(object obj)
    {
        return m_seg.Contains(obj);
    }

    /// <summary>
    /// 按长度排序
    /// </summary>
    public void Sort()
    {
        Sort(this);
    }

    /// <summary>
    /// 按长度排序
    /// </summary>
    public void Sort(SegList list)
    {
        int max = 0;
        for (int i = 0; i < list.Count - 1; ++i)
        {
            max = i;
            for (int j = i + 1; j < list.Count; ++j)
            {

                string str1 = list.GetElem(j).ToString();
                string str2 = list.GetElem(max).ToString();
                int l1;
                int l2;
                if (str1 == "null")
                    l1 = 0;
                else
                    l1 = str1.Length;

                if (str2 == "null")
                    l2 = 0;
                else
                    l2 = str2.Length;

                if (l1 > l2)
                    max = j;
            }
            object o = list.GetElem(max);
            list.SetElem(max, list.GetElem(i));
            list.SetElem(i, o);
        }
    }
}

/// <summary>
/// 分词类
/// </summary>
//----------------调用----------------------
//Segment seg = new Segment();
//seg.InitWordDics();
//seg.EnablePrefix = true;
//seg.Separator =" ";
//seg.SegmentText("字符串", false).Trim();
//-------------------------------------------
public class Segment
{
    #region 私有字段
    private string m_DicPath =    System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sDict.dic");
    private string m_NoisePath =  System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sNoise.dic");
    private string m_NumberPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sNumber.dic");
    private string m_WordPath =   System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sWord.dic");
    private string m_PrefixPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sPrefix.dic");
    private Hashtable htWords;
    private ArrayList alNoise;
    private ArrayList alNumber;
    private ArrayList alWord;
    private ArrayList alPrefix;
    private double m_EventTime = 0;

    /// <summary>
    /// 分隔符
    /// </summary>
    private string m_Separator = " ";

    /// <summary>
    /// 用于验证汉字的正则表达式
    /// </summary>
    private string strChinese = "[\u4e00-\u9fa5]";
    #endregion

    #region 公有属性
    /// <summary>
    /// 基本词典路径
    /// </summary>
    public string DicPath
    {
        get
        {
            return m_DicPath;
        }
        set
        {
            m_DicPath = value;
        }
    }

    /// <summary>
    /// 数据缓存函数
    /// </summary>
    /// <param name="key">索引键</param>
    /// <param name="val">缓存的数据</param>
    private static void SetCache(string key, object val)
    {
        if (val == null) val = " ";
        System.Web.HttpContext.Current.Application.Lock();
        System.Web.HttpContext.Current.Application.Set(key, val);
        System.Web.HttpContext.Current.Application.UnLock();
    }

    /// <summary>
    /// 读取缓存
    /// </summary>
    private static object GetCache(string key)
    {
        return System.Web.HttpContext.Current.Application.Get(key);
    }

    /// <summary>
    /// 暂时无用
    /// </summary>
    public string NoisePath
    {
        get
        {
            return m_NoisePath;
        }
        set
        {
            m_NoisePath = value;
        }
    }

    /// <summary>
    /// 数字词典路径
    /// </summary>
    public string NumberPath
    {
        get
        {
            return m_NumberPath;
        }
        set
        {
            m_NumberPath = value;
        }
    }

    /// <summary>
    /// 字母词典路径
    /// </summary>
    public string WordPath
    {
        get
        {
            return m_WordPath;
        }
        set
        {
            m_WordPath = value;
        }
    }

    /// <summary>
    /// 姓名前缀字典 用于纠错姓名
    /// </summary>
    public string PrefixPath
    {
        get
        {
            return m_PrefixPath;
        }
        set
        {
            m_PrefixPath = value;
        }
    }

    /// <summary>
    /// 是否开启姓名纠错功能
    /// </summary>
    public bool EnablePrefix
    {
        get
        {
            if (alPrefix.Count == 0)
                return false;
            else
                return true;
        }
        set
        {
            if (value)
                alPrefix = LoadWords(PrefixPath, alPrefix);
            else
                alPrefix = new ArrayList();
        }
    }

    /// <summary>
    /// 用时每次进行加载或分词动作后改属性表示为上一次动作所用时间
    /// 已精确到毫秒但分词操作在字符串较短时可能为0
    /// </summary>
    public double EventTime
    {
        get
        {
            return m_EventTime;
        }
    }

    /// <summary>
    /// 分隔符,默认为空格
    /// </summary>
    public string Separator
    {
        get
        {
            return m_Separator;
        }
        set
        {
            if (value != "" && value != null) m_Separator = value;
        }
    }
    #endregion

    #region 构造方法
    /// <summary>
    /// 构造方法
    /// </summary>
    public Segment()
    { }

    /// <summary>
    /// 构造方法
    /// </summary>
    public Segment(string p_DicPath, string p_NoisePath, string p_NumberPath, string p_WordPath)
    {
        m_WordPath = p_DicPath;
        m_WordPath = p_NoisePath;
        m_WordPath = p_NumberPath;
        m_WordPath = p_WordPath;
        this.InitWordDics();
    }
    #endregion

    #region 公有方法
    /// <summary>
    /// 加载词列表
    /// </summary>
    public void InitWordDics()
    {
        DateTime start = DateTime.Now;
        if (GetCache("jcms_dict") == null)
        {
            htWords = new Hashtable();
            Hashtable father = htWords;
            Hashtable forfather = htWords;

            string strChar1;
            string strChar2;

            StreamReader reader = new StreamReader(DicPath, System.Text.Encoding.UTF8);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -