📄 seglist.cs
字号:
using System;
using System.Collections;
using System.IO;
using System.Text.RegularExpressions;
/// <summary>
/// 分词辅助类
/// </summary>
public class SegList
{
public int MaxLength;
private ArrayList m_seg;
public int Count
{
get
{
return m_seg.Count;
}
}
public SegList()
{
m_seg = new ArrayList();
MaxLength = 0;
}
public void Add(object obj)
{
m_seg.Add(obj);
if (MaxLength < obj.ToString().Length)
{
MaxLength = obj.ToString().Length;
}
}
public object GetElem(int i)
{
if (i < this.Count)
return m_seg[i];
else
return null;
}
public void SetElem(int i, object obj)
{
m_seg[i] = obj;
}
public bool Contains(object obj)
{
return m_seg.Contains(obj);
}
/// <summary>
/// 按长度排序
/// </summary>
public void Sort()
{
Sort(this);
}
/// <summary>
/// 按长度排序
/// </summary>
public void Sort(SegList list)
{
int max = 0;
for (int i = 0; i < list.Count - 1; ++i)
{
max = i;
for (int j = i + 1; j < list.Count; ++j)
{
string str1 = list.GetElem(j).ToString();
string str2 = list.GetElem(max).ToString();
int l1;
int l2;
if (str1 == "null")
l1 = 0;
else
l1 = str1.Length;
if (str2 == "null")
l2 = 0;
else
l2 = str2.Length;
if (l1 > l2)
max = j;
}
object o = list.GetElem(max);
list.SetElem(max, list.GetElem(i));
list.SetElem(i, o);
}
}
}
/// <summary>
/// 分词类
/// </summary>
//----------------调用----------------------
//Segment seg = new Segment();
//seg.InitWordDics();
//seg.EnablePrefix = true;
//seg.Separator =" ";
//seg.SegmentText("字符串", false).Trim();
//-------------------------------------------
public class Segment
{
#region 私有字段
private string m_DicPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sDict.dic");
private string m_NoisePath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sNoise.dic");
private string m_NumberPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sNumber.dic");
private string m_WordPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sWord.dic");
private string m_PrefixPath = System.Web.HttpContext.Current.Server.MapPath("bin/ShootSeg/sPrefix.dic");
private Hashtable htWords;
private ArrayList alNoise;
private ArrayList alNumber;
private ArrayList alWord;
private ArrayList alPrefix;
private double m_EventTime = 0;
/// <summary>
/// 分隔符
/// </summary>
private string m_Separator = " ";
/// <summary>
/// 用于验证汉字的正则表达式
/// </summary>
private string strChinese = "[\u4e00-\u9fa5]";
#endregion
#region 公有属性
/// <summary>
/// 基本词典路径
/// </summary>
public string DicPath
{
get
{
return m_DicPath;
}
set
{
m_DicPath = value;
}
}
/// <summary>
/// 数据缓存函数
/// </summary>
/// <param name="key">索引键</param>
/// <param name="val">缓存的数据</param>
private static void SetCache(string key, object val)
{
if (val == null) val = " ";
System.Web.HttpContext.Current.Application.Lock();
System.Web.HttpContext.Current.Application.Set(key, val);
System.Web.HttpContext.Current.Application.UnLock();
}
/// <summary>
/// 读取缓存
/// </summary>
private static object GetCache(string key)
{
return System.Web.HttpContext.Current.Application.Get(key);
}
/// <summary>
/// 暂时无用
/// </summary>
public string NoisePath
{
get
{
return m_NoisePath;
}
set
{
m_NoisePath = value;
}
}
/// <summary>
/// 数字词典路径
/// </summary>
public string NumberPath
{
get
{
return m_NumberPath;
}
set
{
m_NumberPath = value;
}
}
/// <summary>
/// 字母词典路径
/// </summary>
public string WordPath
{
get
{
return m_WordPath;
}
set
{
m_WordPath = value;
}
}
/// <summary>
/// 姓名前缀字典 用于纠错姓名
/// </summary>
public string PrefixPath
{
get
{
return m_PrefixPath;
}
set
{
m_PrefixPath = value;
}
}
/// <summary>
/// 是否开启姓名纠错功能
/// </summary>
public bool EnablePrefix
{
get
{
if (alPrefix.Count == 0)
return false;
else
return true;
}
set
{
if (value)
alPrefix = LoadWords(PrefixPath, alPrefix);
else
alPrefix = new ArrayList();
}
}
/// <summary>
/// 用时每次进行加载或分词动作后改属性表示为上一次动作所用时间
/// 已精确到毫秒但分词操作在字符串较短时可能为0
/// </summary>
public double EventTime
{
get
{
return m_EventTime;
}
}
/// <summary>
/// 分隔符,默认为空格
/// </summary>
public string Separator
{
get
{
return m_Separator;
}
set
{
if (value != "" && value != null) m_Separator = value;
}
}
#endregion
#region 构造方法
/// <summary>
/// 构造方法
/// </summary>
public Segment()
{ }
/// <summary>
/// 构造方法
/// </summary>
public Segment(string p_DicPath, string p_NoisePath, string p_NumberPath, string p_WordPath)
{
m_WordPath = p_DicPath;
m_WordPath = p_NoisePath;
m_WordPath = p_NumberPath;
m_WordPath = p_WordPath;
this.InitWordDics();
}
#endregion
#region 公有方法
/// <summary>
/// 加载词列表
/// </summary>
public void InitWordDics()
{
DateTime start = DateTime.Now;
if (GetCache("jcms_dict") == null)
{
htWords = new Hashtable();
Hashtable father = htWords;
Hashtable forfather = htWords;
string strChar1;
string strChar2;
StreamReader reader = new StreamReader(DicPath, System.Text.Encoding.UTF8);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -