📄 sentencesplitter.cs
字号:
using System;
/*
模块名称:补天在线中文分词组件
* 程序:温泉
* 日期:2008-1-24
*/
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.SqlClient;
using System.Text.RegularExpressions;
using System.Collections;
namespace FillSkynet.WordStock
{
public class SentenceSplitter
{
private SqlConnection _con;
public SentenceSplitter()
{
}
public SentenceSplitter(string sentence)
{
}
#region private Member
private string _SplitString;
private string _connetionString;
private string _Sentence;
#endregion
#region Propertys
public string Sentence
{
get { return _Sentence; }
set { _Sentence = value; }
}
public string ConnetionString
{
get { return _connetionString; }
set { _connetionString = value; }
}
public string SplitString
{
get { return _SplitString; }
set { _SplitString = value; }
}
#endregion
public string SplitSentence(string sentence)
{
string[] StringArray;
sentence = Regex.Replace(sentence, "[\\n,/\\.\"'!\\?;:\\(\\)<=>\\+\\*\\-@\\[\\]\\{\\},\\\\。;:‘“”、!?《》()【】——…]", " ");//特殊符号过滤
sentence = Regex.Replace(sentence,"\\s{2,}"," ");
StringArray = sentence.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
StringBuilder Sb;
if (StringArray != null && StringArray.Length > 0)
{
for (int i = 0; i < StringArray.Length; i++)
{
if (!Regex.IsMatch(sentence, "[^a-z0-9]+", RegexOptions.IgnoreCase))
{
continue;
}
Sb = new StringBuilder(StringArray[i]);
}
}
return null;
}
public ArrayList SplitSingleSenetence(string centence)
{
ArrayList Results;
char[] ArrayAll;
int MaxLength;
string Temp="";
if (string.IsNullOrEmpty(centence)) return null;
OpenConnection();
MaxLength = GetWordMaxLength();
Results = new ArrayList();
if (centence.Length==1||Exist(centence))//全字匹配的情况
{
Results.Add(centence);
CloseConnection();
return Results;
}
else
{
while (centence.Length > 1)
{
if (centence.Length > MaxLength)
Temp = centence.Substring(0, MaxLength);
else
Temp = centence;
while (Temp.Length > 1)
{
//if (Temp.Length <= MaxLength)
//{
if (Temp.Length>1&&!Regex.IsMatch(Temp, "[\\n,/\\.\"'!\\?;:\\(\\)<=>\\+\\*\\-@\\[\\]\\{\\},\\\\。;:‘“”、!?《》()【】——…]"))//特殊符号过滤)
{
if (Temp.Length == 1 || Regex.IsMatch(Temp, "^\\d+$") || Regex.IsMatch(Temp, "^[a-zA-Z]+$") || Exist(Temp))
{
Results.Add(Temp);
break;
}
}
//}
Temp = Temp.Substring(0, Temp.Length - 1);
}
centence = centence.Substring(Temp.Length);
}
}
CloseConnection();
return Results;
}
/// <summary>
/// 这个是
/// </summary>
/// <param name="word"></param>
/// <returns></returns>
private bool Exist(string word)
{
string sql;
bool Value;
sql = "select word from sky_wordstock where word=@word";
SqlDataReader Sdr;
SqlCommand cmd = new SqlCommand(sql);
cmd.Connection = _con;
cmd.Parameters.Add( new SqlParameter("word",word));
Sdr = cmd.ExecuteReader();
Sdr.Read();
Value = !Sdr.HasRows; //
Sdr.Close();
if (Value.ToString() == "0") return false;
return true;
}
private int GetWordMaxLength()
{
SqlCommand cmd;
int Value;
SqlDataReader Sda;
cmd = new SqlCommand("select max(leng) from (select len(word) as leng from sky_wordstock) aaa");
cmd.Connection = _con;
Sda = cmd.ExecuteReader();
Sda.Read();
Value = (int)Sda[0];
Sda.Close();
return Value;
}
private void OpenConnection()
{
if (_con == null)
{
_con = new SqlConnection("server=220.162.244.144;database=fillsky_skill;user id=wenquan;password=12053519");
}
if (_con.State != ConnectionState.Open)
{
_con.Open();
}
}
private void CloseConnection()
{
if (_con != null) _con.Close();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -