⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sentencesplitter.cs

📁 .NET写的中文分词组件
💻 CS
字号:
using System;
/*
 模块名称:补天在线中文分词组件
 *   程序:温泉
 *   日期:2008-1-24
 */

using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.SqlClient;
using System.Text.RegularExpressions;
using System.Collections;

namespace FillSkynet.WordStock
{
     public  class SentenceSplitter
    {

         private SqlConnection _con;
         public SentenceSplitter()
         {

         }

         public SentenceSplitter(string sentence)
         {

         }

         #region private Member
        private string _SplitString;
        private string _connetionString;
        private string _Sentence;

        #endregion

         #region Propertys
        public string Sentence
        {
            get { return _Sentence; }
            set { _Sentence = value; }
        }

        public string ConnetionString
        {
            get { return _connetionString; }
            set { _connetionString = value; }
        }

        public string SplitString
        {
            get { return _SplitString; }
            set { _SplitString = value; }
        }
        #endregion

        public string SplitSentence(string sentence)
        {
            string[] StringArray;

            sentence = Regex.Replace(sentence, "[\\n,/\\.\"'!\\?;:\\(\\)<=>\\+\\*\\-@\\[\\]\\{\\},\\\\。;:‘“”、!?《》()【】——…]", " ");//特殊符号过滤
            sentence = Regex.Replace(sentence,"\\s{2,}"," ");
            StringArray = sentence.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
            StringBuilder Sb;
            if (StringArray != null && StringArray.Length > 0)
            {
                for (int i = 0; i < StringArray.Length; i++)
                {
                    if (!Regex.IsMatch(sentence, "[^a-z0-9]+", RegexOptions.IgnoreCase))
                    {
                        continue;
                    }
                    Sb = new StringBuilder(StringArray[i]);
                }
            }
            return null;
        }

         public ArrayList SplitSingleSenetence(string centence)
         {
             ArrayList Results;
             char[] ArrayAll;
             int MaxLength;
            string Temp="";
            if (string.IsNullOrEmpty(centence)) return null;
            
            OpenConnection();

             MaxLength = GetWordMaxLength();
             Results = new ArrayList();
             if (centence.Length==1||Exist(centence))//全字匹配的情况
             {
                 Results.Add(centence);
                 CloseConnection();
                 return Results;
             }
             else
             {
                 while (centence.Length > 1)
                 {
                     if (centence.Length > MaxLength)
                         Temp = centence.Substring(0, MaxLength);
                     else
                         Temp = centence;

                     while (Temp.Length > 1)
                     {
                         //if (Temp.Length <= MaxLength)
                         //{
                             if (Temp.Length>1&&!Regex.IsMatch(Temp, "[\\n,/\\.\"'!\\?;:\\(\\)<=>\\+\\*\\-@\\[\\]\\{\\},\\\\。;:‘“”、!?《》()【】——…]"))//特殊符号过滤)
                             {
                                 if (Temp.Length == 1 || Regex.IsMatch(Temp, "^\\d+$") || Regex.IsMatch(Temp, "^[a-zA-Z]+$") || Exist(Temp))
                                 {
                                     Results.Add(Temp);
                                     break;
                                 }
                             }
                         //}
                         Temp = Temp.Substring(0, Temp.Length - 1);
                     }
                     centence = centence.Substring(Temp.Length);
                 }
             }
             CloseConnection();
             return Results;
         }

         /// <summary>
         /// 这个是
         /// </summary>
         /// <param name="word"></param>
         /// <returns></returns>
         private bool Exist(string word)
         {
             string sql;
             bool Value;
             sql = "select word from sky_wordstock where word=@word";
             SqlDataReader Sdr;
             SqlCommand cmd = new SqlCommand(sql);
             cmd.Connection = _con;
             cmd.Parameters.Add( new SqlParameter("word",word));
             Sdr = cmd.ExecuteReader();
             Sdr.Read();
             Value = !Sdr.HasRows; //
             Sdr.Close();
             if (Value.ToString() == "0") return false;
             return true;
         }

         private int GetWordMaxLength()
         {
             SqlCommand cmd;
             int Value;
             SqlDataReader Sda;
             cmd = new SqlCommand("select max(leng) from (select len(word) as leng from sky_wordstock) aaa");
             cmd.Connection = _con;
             Sda = cmd.ExecuteReader();
             Sda.Read();
             Value = (int)Sda[0];
             Sda.Close();
             return Value;
         }

         private void OpenConnection()
         {
             if (_con == null)
             {
                 _con = new SqlConnection("server=220.162.244.144;database=fillsky_skill;user id=wenquan;password=12053519");
             }
             if (_con.State != ConnectionState.Open)
             {
                 _con.Open();
             }
         }

         private void CloseConnection()
         {
             if (_con != null) _con.Close();
         }
     }
 }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -