⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 formdemo.cs

📁 KTDictSeg 简介: KTDictSeg 是由KaiToo搜索开发的一款基于字典的简单中英文分词算法 * 主要功能: 中英文分词
💻 CS
字号:
/***************************************************************************************
 * KTDictSeg 简介: KTDictSeg 是由KaiToo搜索开发的一款基于字典的简单中英文分词算法
 * 主要功能: 中英文分词,未登录词识别,多元歧义自动识别,全角字符识别能力
 * 主要性能指标:
 * 分词准确度:90%以上(有待专家的权威评测)
 * 处理速度: 600KBytes/s
 * 
 * 版本: V1.2.02 
 * Copyright(c) 2007 http://www.kaitoo.com 
 * 作者:肖波
 * 授权: 开源GPL
 * 公司网站: http://www.kaitoo.com
 * 个人博客: http://blog.csdn.net/eaglet; http://www.cnblogs.com/eaglet
 * 联系方式: blog.eaglet@gmail.com
 * ***************************************************************************************/

using System;
using System.Collections.Generic;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.IO;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Diagnostics;
using KTDictSeg;
using FTAlgorithm;

namespace Demo
{
    public partial class FormDemo : Form
    {
        public static CSimpleDictSeg m_SimpleDictSeg;

        String m_InitSource = "KTDictSeg 简介: KTDictSeg 是由KaiToo搜索开发的一款基于字典的简单中英文分词算法\r\n"+
            "主要功能: 中英文分词,未登录词识别,多元歧义自动识别,全角字符识别能力\r\n" +
            "主要性能指标:\r\n" +
            "分词准确度:90%以上(有待专家的权威评测)\r\n" +
            "处理速度: 600KBytes/s\r\n" +
            "用于测试的句子:\r\n" +
            "长春市长春节致词\r\n" +
            "长春市长春药店\r\n" +
            "IBM的技术和服务都不错\r\n" +
            "张三在一月份工作会议上说的确实在理\r\n" +
            "于北京时间5月10日举行运动会\r\n"+
            "我的和服务必在明天做好" ;  


        public FormDemo()
        {
            InitializeComponent();
        }

        private void DisplaySegmentAndPostion()
        {
            if (m_SimpleDictSeg == null)
            {
                try
                {
                    m_SimpleDictSeg = new CSimpleDictSeg();
                    m_SimpleDictSeg.LoadConfig("KTDictSeg.xml");

                    checkBoxFreqFirst.Checked = m_SimpleDictSeg.FreqFirst;
                    checkBoxAutoStudy.Checked = m_SimpleDictSeg.AutoStudy;
                    checkBoxFilterStopWords.Checked = m_SimpleDictSeg.FilterStopWords;
                    checkBoxMatchName.Checked = m_SimpleDictSeg.MatchName;
                    numericUpDownAutoSaveInterval.Value = m_SimpleDictSeg.AutoSaveInterval;
                    numericUpDownUnknownWordsThreshold.Value = m_SimpleDictSeg.UnknownWordsThreshold;

                    //m_SimpleDictSeg.DictPath = @"..\..\..\Data\";
                    m_SimpleDictSeg.LoadDict();
                }
                catch (Exception e1)
                {
                    m_SimpleDictSeg = null;
                    MessageBox.Show(String.Format("Load Dict Fail! ErrMsg:{0}", e1.Message),
                        "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
                    return;
                }
            }
            else
            {
                m_SimpleDictSeg.FreqFirst = checkBoxFreqFirst.Checked;
                m_SimpleDictSeg.AutoStudy = checkBoxAutoStudy.Checked;
                m_SimpleDictSeg.FilterStopWords = checkBoxFilterStopWords.Checked;
                m_SimpleDictSeg.MatchName = checkBoxMatchName.Checked;
                m_SimpleDictSeg.AutoSaveInterval = (int)numericUpDownAutoSaveInterval.Value;
                m_SimpleDictSeg.UnknownWordsThreshold = (int)numericUpDownUnknownWordsThreshold.Value;
            }

            Stopwatch watch = new Stopwatch();
            watch.Start();

            List<T_WordInfo> words = m_SimpleDictSeg.SegmentToWordInfos(textBoxSource.Text);

            watch.Stop();

            labelSrcLength.Text = textBoxSource.Text.Length.ToString();

            labelSegTime.Text = watch.Elapsed.ToString();
            if (watch.ElapsedMilliseconds == 0)
            {
                labelRegRate.Text = "无穷大";
            }
            else
            {
                labelRegRate.Text = ((double)(textBoxSource.Text.Length / watch.ElapsedMilliseconds) * 1000).ToString();
            }

            StringBuilder wordsString = new StringBuilder();
            foreach (T_WordInfo wordInfo in words)
            {
                wordsString.AppendFormat("{0}({1})/", wordInfo.Word, wordInfo.Position);
            }

            textBoxSegwords.Text = wordsString.ToString();


        }

        private void DisplaySegment()
        {
            if (m_SimpleDictSeg == null)
            {
                try
                {
                    m_SimpleDictSeg = new CSimpleDictSeg();
                    m_SimpleDictSeg.LoadConfig("KTDictSeg.xml");

                    checkBoxFreqFirst.Checked = m_SimpleDictSeg.FreqFirst;
                    checkBoxAutoStudy.Checked = m_SimpleDictSeg.AutoStudy;
                    checkBoxFilterStopWords.Checked = m_SimpleDictSeg.FilterStopWords;
                    checkBoxMatchName.Checked = m_SimpleDictSeg.MatchName;
                    numericUpDownAutoSaveInterval.Value = m_SimpleDictSeg.AutoSaveInterval;
                    numericUpDownUnknownWordsThreshold.Value = m_SimpleDictSeg.UnknownWordsThreshold;

                    //m_SimpleDictSeg.DictPath = @"..\..\..\Data\";
                    m_SimpleDictSeg.LoadDict();
                }
                catch (Exception e1)
                {
                    m_SimpleDictSeg = null;
                    MessageBox.Show(String.Format("Load Dict Fail! ErrMsg:{0}", e1.Message),
                        "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
                    return;
                }
            }
            else
            {
                m_SimpleDictSeg.FreqFirst = checkBoxFreqFirst.Checked;
                m_SimpleDictSeg.AutoStudy = checkBoxAutoStudy.Checked;
                m_SimpleDictSeg.FilterStopWords = checkBoxFilterStopWords.Checked;
                m_SimpleDictSeg.MatchName = checkBoxMatchName.Checked;
                m_SimpleDictSeg.AutoSaveInterval = (int)numericUpDownAutoSaveInterval.Value;
                m_SimpleDictSeg.UnknownWordsThreshold = (int)numericUpDownUnknownWordsThreshold.Value;
            }

            Stopwatch watch = new Stopwatch();
            watch.Start();

            List<String> words = m_SimpleDictSeg.Segment(textBoxSource.Text);

            watch.Stop();

            labelSrcLength.Text = textBoxSource.Text.Length.ToString();

            labelSegTime.Text = watch.Elapsed.ToString();
            if (watch.ElapsedMilliseconds == 0)
            {
                labelRegRate.Text = "无穷大";
            }
            else
            {
                labelRegRate.Text = ((double)(textBoxSource.Text.Length / watch.ElapsedMilliseconds) * 1000).ToString();
            }

            StringBuilder wordsString = new StringBuilder();
            foreach (String str in words)
            {
                wordsString.AppendFormat("{0}/", str);
            }

            textBoxSegwords.Text = wordsString.ToString();

        }

        private void buttonSegment_Click(object sender, EventArgs e)
        {
            if (checkBoxDisplayPosition.Checked)
            {
                DisplaySegmentAndPostion();
            }
            else
            {
                DisplaySegment();
            }
        }

        private void FormDemo_Load(object sender, EventArgs e)
        {
            textBoxSource.Text = m_InitSource;
            DisplaySegment();
        }

        private void linkLabel_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
        {
            System.Diagnostics.Process.Start(((LinkLabel)sender).Text);

        }

        private void comboBoxMatchDir_KeyPress(object sender, KeyPressEventArgs e)
        {
            e.Handled = true;
        }

        private void buttonTrafficPos_Click(object sender, EventArgs e)
        {
            if (m_SimpleDictSeg == null)
            {
                return;
            }

            FormTrafficPos frmTrafficPos = new FormTrafficPos();
            frmTrafficPos.Show();
        }

        private void buttonSaveConfig_Click(object sender, EventArgs e)
        {
            m_SimpleDictSeg.SaveConfig("KTDictSeg.xml");
        }
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -