⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 form1.cs

📁 参考詹卫东写的分词程序
💻 CS
📖 第 1 页 / 共 2 页
字号:
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Data.OleDb;
using System.IO;
using System.Collections.Specialized;
using System.Text;
using wordLibrary_keqian070507;

namespace NLP_WordSeg
{
	
	/// <summary>
	/// Form1 的摘要说明。
	/// </summary>
	public class Form1 : System.Windows.Forms.Form
	{
		private System.Windows.Forms.Button button1;

		///<summary>
		///变量,用来链接数据库使用,DataConnect,and so on
		///<summary>
		private System.Data.OleDb.OleDbConnection _connOle;
		private System.Data.DataSet _dataSet;
		private string _fileNameStr;
		private bool _openDBb;
		private char[] _separatorsC = {'!','。','?'};
		private const string _strExt = "_seg";
		private const string _Separator = @"\";
		private const int _iNumberMax = 4;

		private  ArrayList[] LibArray = new ArrayList[0x9FA5-0x4E00+1];//用来记录数据


		private System.Windows.Forms.Button btOpenFile;
		private System.Windows.Forms.GroupBox groupBox1;
		private System.Windows.Forms.GroupBox groupBox2;
		private System.Windows.Forms.Button btMMMethod;
		private System.Windows.Forms.TextBox textBox1;
		private System.Windows.Forms.Label label1;

		/// <summary>
		/// 必需的设计器变量。
		/// </summary>
		private System.ComponentModel.Container components = null;

		public Form1()
		{
			//
			// Windows 窗体设计器支持所必需的
			//
			InitializeComponent();

			//
			// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
			//

			_connOle = null;
			_dataSet = new DataSet();
			_fileNameStr = "";
			_openDBb = false;

			for (int i = 0; i< 0x9FA5-0x4E00+1; i++)
			{
				LibArray[i] = new ArrayList();
			}
		}

		/// <summary>
		/// 清理所有正在使用的资源。
		/// </summary>
		protected override void Dispose( bool disposing )
		{
			if( disposing )
			{
				if (components != null) 
				{
					components.Dispose();
				}
			}
			base.Dispose( disposing );
		}


		#region Windows 窗体设计器生成的代码
		/// <summary>
		/// 设计器支持所需的方法 - 不要使用代码编辑器修改
		/// 此方法的内容。
		/// </summary>
		private void InitializeComponent()
		{
			this.button1 = new System.Windows.Forms.Button();
			this.btOpenFile = new System.Windows.Forms.Button();
			this.groupBox1 = new System.Windows.Forms.GroupBox();
			this.groupBox2 = new System.Windows.Forms.GroupBox();
			this.label1 = new System.Windows.Forms.Label();
			this.textBox1 = new System.Windows.Forms.TextBox();
			this.btMMMethod = new System.Windows.Forms.Button();
			this.groupBox1.SuspendLayout();
			this.groupBox2.SuspendLayout();
			this.SuspendLayout();
			// 
			// button1
			// 
			this.button1.FlatStyle = System.Windows.Forms.FlatStyle.Flat;
			this.button1.Location = new System.Drawing.Point(16, 40);
			this.button1.Name = "button1";
			this.button1.Size = new System.Drawing.Size(96, 32);
			this.button1.TabIndex = 0;
			this.button1.Text = "载入语料库";
			this.button1.Click += new System.EventHandler(this.button1_Click);
			// 
			// btOpenFile
			// 
			this.btOpenFile.FlatStyle = System.Windows.Forms.FlatStyle.Flat;
			this.btOpenFile.Location = new System.Drawing.Point(16, 80);
			this.btOpenFile.Name = "btOpenFile";
			this.btOpenFile.Size = new System.Drawing.Size(96, 32);
			this.btOpenFile.TabIndex = 1;
			this.btOpenFile.Text = "打开处理文件";
			this.btOpenFile.Click += new System.EventHandler(this.btOpenFile_Click);
			// 
			// groupBox1
			// 
			this.groupBox1.Controls.Add(this.button1);
			this.groupBox1.Controls.Add(this.btOpenFile);
			this.groupBox1.Location = new System.Drawing.Point(16, 32);
			this.groupBox1.Name = "groupBox1";
			this.groupBox1.Size = new System.Drawing.Size(224, 280);
			this.groupBox1.TabIndex = 2;
			this.groupBox1.TabStop = false;
			this.groupBox1.Text = "打开文件";
			// 
			// groupBox2
			// 
			this.groupBox2.Controls.Add(this.label1);
			this.groupBox2.Controls.Add(this.textBox1);
			this.groupBox2.Controls.Add(this.btMMMethod);
			this.groupBox2.Location = new System.Drawing.Point(256, 32);
			this.groupBox2.Name = "groupBox2";
			this.groupBox2.Size = new System.Drawing.Size(400, 272);
			this.groupBox2.TabIndex = 3;
			this.groupBox2.TabStop = false;
			this.groupBox2.Text = "分词方法";
			// 
			// label1
			// 
			this.label1.Font = new System.Drawing.Font("宋体", 10.5F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((System.Byte)(134)));
			this.label1.Location = new System.Drawing.Point(120, 48);
			this.label1.Name = "label1";
			this.label1.Size = new System.Drawing.Size(64, 16);
			this.label1.TabIndex = 4;
			this.label1.Text = "所用时间";
			// 
			// textBox1
			// 
			this.textBox1.AcceptsReturn = true;
			this.textBox1.BorderStyle = System.Windows.Forms.BorderStyle.FixedSingle;
			this.textBox1.ImeMode = System.Windows.Forms.ImeMode.On;
			this.textBox1.Location = new System.Drawing.Point(192, 48);
			this.textBox1.Multiline = true;
			this.textBox1.Name = "textBox1";
			this.textBox1.Size = new System.Drawing.Size(192, 24);
			this.textBox1.TabIndex = 3;
			this.textBox1.Text = "";
			// 
			// btMMMethod
			// 
			this.btMMMethod.FlatStyle = System.Windows.Forms.FlatStyle.Flat;
			this.btMMMethod.Location = new System.Drawing.Point(16, 40);
			this.btMMMethod.Name = "btMMMethod";
			this.btMMMethod.Size = new System.Drawing.Size(96, 32);
			this.btMMMethod.TabIndex = 2;
			this.btMMMethod.Text = "最大匹配法";
			this.btMMMethod.Click += new System.EventHandler(this.btMMMethod_Click);
			// 
			// Form1
			// 
			this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
			this.ClientSize = new System.Drawing.Size(696, 342);
			this.Controls.Add(this.groupBox2);
			this.Controls.Add(this.groupBox1);
			this.Name = "Form1";
			this.Text = "自动分词和词性标注系统";
			this.groupBox1.ResumeLayout(false);
			this.groupBox2.ResumeLayout(false);
			this.ResumeLayout(false);

		}
		#endregion

		/// <summary>
		/// 应用程序的主入口点。
		/// </summary>
		[STAThread]
		static void Main() 
		{
			Application.Run(new Form1());
		}


		#region ==选择语料库
		private void button1_Click(object sender, System.EventArgs e)
		{
			OpenFileDialog oF = new OpenFileDialog();
			oF.Title = "选择语料库.Access";
			oF.InitialDirectory = @"E:\";
			oF.Filter = "access(*.mdb)|*.mdb";
			oF.RestoreDirectory = true;
			string _connString = "";

			if(oF.ShowDialog()== DialogResult.OK)
			{
				_connString = oF.FileName;

				//connect to the database, Access type


				string _connStrUsed ="Provider=Microsoft.Jet.OLEDB.4.0;Data Source="+_connString;   
				using(_connOle = new OleDbConnection(@_connStrUsed))
				{
					#region == 读入数据库中表==
					// 获得数据库中文件信息
					_connOle.Open();

					OleDbDataAdapter _dataAdapterOle  = new OleDbDataAdapter();
					OleDbCommand _commandOle = new OleDbCommand();

					_commandOle.CommandText = "select * from words";
					_commandOle.Connection = _connOle;
					_dataAdapterOle.SelectCommand = _commandOle;
					_dataAdapterOle.Fill(_dataSet,"words");
					_dataAdapterOle.Dispose();
					_dataAdapterOle = null;

					_commandOle.CommandText = "select * from poss";
					_commandOle.Connection = _connOle;
					_dataAdapterOle = new OleDbDataAdapter(_commandOle);
					_dataAdapterOle.Fill(_dataSet,"poss");
					_dataAdapterOle.Dispose();
					_dataAdapterOle = null;

					_commandOle.CommandText = "select * from hanzixm";
					_commandOle.Connection = _connOle;
					_dataAdapterOle = new OleDbDataAdapter(_commandOle);
					_dataAdapterOle.Fill(_dataSet,"hanzixm");
					_dataAdapterOle.Dispose();
					_dataAdapterOle = null;
//未知错误....Keqian 
//					_commandOle.CommandText = "select * from names";
//					_commandOle.Connection = _connOle;
//					_dataAdapterOle = new OleDbDataAdapter(_commandOle);
//					_dataAdapterOle.Fill(_dataSet,"namesFee");
//					_dataAdapterOle.Dispose();
//					_dataAdapterOle = null;

					_commandOle.CommandText = "select * from sense";
					_commandOle.Connection = _connOle;
					_dataAdapterOle = new OleDbDataAdapter(_commandOle);
					_dataAdapterOle.Fill(_dataSet,"sense");
					_dataAdapterOle.Dispose();
					_dataAdapterOle = null;

//					_dataSet.Relations.Add("wids",_dataSet.Tables["words"].Columns["wid"],
//						_dataSet.Tables["poss"].Columns["wid"]);
					//原来数据不对应,导致加约束失败

					_connOle.Close();
					_openDBb = true;//表示open db 成功

					#endregion
					
					#region == 将数据读入到Arraylist中

					wordsStr word = new wordsStr();
					foreach(DataRow dr in _dataSet.Tables["words"].Rows)
					{
						word.wordPrase=(string)dr["word"];
						word.eNumPoW = 0;//表示词性暂时默认为0
						word.wFreq = (int)dr["wfreq"];

						if ((UInt16)word.wordPrase[0]>= 0x4E00 && (UInt16)word.wordPrase[0] <= 0x9FA5)
						{
							int d = word.wordPrase[0]-0x4E00;

							LibArray[d].Add(word);
						}
					}

					for (int i = 0; i < LibArray.Length; i++)
					{
						if (LibArray[i].Count ==0 )
						{
							char a = (char)(i+0x4E00);
							word.wordPrase =a.ToString();
							word.wFreq = 1;
							LibArray[i].Add(word);
						}
					}
					#endregion

					_dataSet.Clear();//删除_dataSet

					MessageBox.Show("载入成功!","操作结果");					
				}

			}
			else
			{
				MessageBox.Show("没有选择数据库文件!","错误!");
			}
		}

		#endregion

		#region == 打开待处理文件==
		private void btOpenFile_Click(object sender, System.EventArgs e)
		{
			OpenFileDialog _openFileDlg = new OpenFileDialog();
			_openFileDlg.Title = "打开待处理文本文件";
			_openFileDlg.Filter = "文本文件(*.txt)|*.txt";

			if (_openFileDlg.ShowDialog()==DialogResult.OK)
			{
				_fileNameStr = _openFileDlg.FileName;
				MessageBox.Show("打开成功","操作结果");
			}
		}

		#endregion

		#region ==进行分词处理==
		private void btMMMethod_Click(object sender, System.EventArgs e)
		{
			
			//(1)确定是否已经打开
			if(!_openDBb)
			{
				MessageBox.Show("请先打开词库!","结果");
				return;
			}
			if (_fileNameStr == "")
			{
				MessageBox.Show("没有选择待处理文件","结果");
				return;
			}


			//
			DateTime dtStart = DateTime.Now;
			
			//(2)打开文件,并对文件进行读取,按照一句话一句话进行处理。

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -