📄 form1.cs
字号:
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Data.OleDb;
using System.IO;
using System.Collections.Specialized;
using System.Text;
using wordLibrary_keqian070507;
namespace NLP_WordSeg
{
/// <summary>
/// Form1 的摘要说明。
/// </summary>
public class Form1 : System.Windows.Forms.Form
{
private System.Windows.Forms.Button button1;
///<summary>
///变量,用来链接数据库使用,DataConnect,and so on
///<summary>
private System.Data.OleDb.OleDbConnection _connOle;
private System.Data.DataSet _dataSet;
private string _fileNameStr;
private bool _openDBb;
private char[] _separatorsC = {'!','。','?'};
private const string _strExt = "_seg";
private const string _Separator = @"\";
private const int _iNumberMax = 4;
private ArrayList[] LibArray = new ArrayList[0x9FA5-0x4E00+1];//用来记录数据
private System.Windows.Forms.Button btOpenFile;
private System.Windows.Forms.GroupBox groupBox1;
private System.Windows.Forms.GroupBox groupBox2;
private System.Windows.Forms.Button btMMMethod;
private System.Windows.Forms.TextBox textBox1;
private System.Windows.Forms.Label label1;
/// <summary>
/// 必需的设计器变量。
/// </summary>
private System.ComponentModel.Container components = null;
public Form1()
{
//
// Windows 窗体设计器支持所必需的
//
InitializeComponent();
//
// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
//
_connOle = null;
_dataSet = new DataSet();
_fileNameStr = "";
_openDBb = false;
for (int i = 0; i< 0x9FA5-0x4E00+1; i++)
{
LibArray[i] = new ArrayList();
}
}
/// <summary>
/// 清理所有正在使用的资源。
/// </summary>
protected override void Dispose( bool disposing )
{
if( disposing )
{
if (components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
}
#region Windows 窗体设计器生成的代码
/// <summary>
/// 设计器支持所需的方法 - 不要使用代码编辑器修改
/// 此方法的内容。
/// </summary>
private void InitializeComponent()
{
this.button1 = new System.Windows.Forms.Button();
this.btOpenFile = new System.Windows.Forms.Button();
this.groupBox1 = new System.Windows.Forms.GroupBox();
this.groupBox2 = new System.Windows.Forms.GroupBox();
this.label1 = new System.Windows.Forms.Label();
this.textBox1 = new System.Windows.Forms.TextBox();
this.btMMMethod = new System.Windows.Forms.Button();
this.groupBox1.SuspendLayout();
this.groupBox2.SuspendLayout();
this.SuspendLayout();
//
// button1
//
this.button1.FlatStyle = System.Windows.Forms.FlatStyle.Flat;
this.button1.Location = new System.Drawing.Point(16, 40);
this.button1.Name = "button1";
this.button1.Size = new System.Drawing.Size(96, 32);
this.button1.TabIndex = 0;
this.button1.Text = "载入语料库";
this.button1.Click += new System.EventHandler(this.button1_Click);
//
// btOpenFile
//
this.btOpenFile.FlatStyle = System.Windows.Forms.FlatStyle.Flat;
this.btOpenFile.Location = new System.Drawing.Point(16, 80);
this.btOpenFile.Name = "btOpenFile";
this.btOpenFile.Size = new System.Drawing.Size(96, 32);
this.btOpenFile.TabIndex = 1;
this.btOpenFile.Text = "打开处理文件";
this.btOpenFile.Click += new System.EventHandler(this.btOpenFile_Click);
//
// groupBox1
//
this.groupBox1.Controls.Add(this.button1);
this.groupBox1.Controls.Add(this.btOpenFile);
this.groupBox1.Location = new System.Drawing.Point(16, 32);
this.groupBox1.Name = "groupBox1";
this.groupBox1.Size = new System.Drawing.Size(224, 280);
this.groupBox1.TabIndex = 2;
this.groupBox1.TabStop = false;
this.groupBox1.Text = "打开文件";
//
// groupBox2
//
this.groupBox2.Controls.Add(this.label1);
this.groupBox2.Controls.Add(this.textBox1);
this.groupBox2.Controls.Add(this.btMMMethod);
this.groupBox2.Location = new System.Drawing.Point(256, 32);
this.groupBox2.Name = "groupBox2";
this.groupBox2.Size = new System.Drawing.Size(400, 272);
this.groupBox2.TabIndex = 3;
this.groupBox2.TabStop = false;
this.groupBox2.Text = "分词方法";
//
// label1
//
this.label1.Font = new System.Drawing.Font("宋体", 10.5F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((System.Byte)(134)));
this.label1.Location = new System.Drawing.Point(120, 48);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(64, 16);
this.label1.TabIndex = 4;
this.label1.Text = "所用时间";
//
// textBox1
//
this.textBox1.AcceptsReturn = true;
this.textBox1.BorderStyle = System.Windows.Forms.BorderStyle.FixedSingle;
this.textBox1.ImeMode = System.Windows.Forms.ImeMode.On;
this.textBox1.Location = new System.Drawing.Point(192, 48);
this.textBox1.Multiline = true;
this.textBox1.Name = "textBox1";
this.textBox1.Size = new System.Drawing.Size(192, 24);
this.textBox1.TabIndex = 3;
this.textBox1.Text = "";
//
// btMMMethod
//
this.btMMMethod.FlatStyle = System.Windows.Forms.FlatStyle.Flat;
this.btMMMethod.Location = new System.Drawing.Point(16, 40);
this.btMMMethod.Name = "btMMMethod";
this.btMMMethod.Size = new System.Drawing.Size(96, 32);
this.btMMMethod.TabIndex = 2;
this.btMMMethod.Text = "最大匹配法";
this.btMMMethod.Click += new System.EventHandler(this.btMMMethod_Click);
//
// Form1
//
this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
this.ClientSize = new System.Drawing.Size(696, 342);
this.Controls.Add(this.groupBox2);
this.Controls.Add(this.groupBox1);
this.Name = "Form1";
this.Text = "自动分词和词性标注系统";
this.groupBox1.ResumeLayout(false);
this.groupBox2.ResumeLayout(false);
this.ResumeLayout(false);
}
#endregion
/// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main()
{
Application.Run(new Form1());
}
#region ==选择语料库
private void button1_Click(object sender, System.EventArgs e)
{
OpenFileDialog oF = new OpenFileDialog();
oF.Title = "选择语料库.Access";
oF.InitialDirectory = @"E:\";
oF.Filter = "access(*.mdb)|*.mdb";
oF.RestoreDirectory = true;
string _connString = "";
if(oF.ShowDialog()== DialogResult.OK)
{
_connString = oF.FileName;
//connect to the database, Access type
string _connStrUsed ="Provider=Microsoft.Jet.OLEDB.4.0;Data Source="+_connString;
using(_connOle = new OleDbConnection(@_connStrUsed))
{
#region == 读入数据库中表==
// 获得数据库中文件信息
_connOle.Open();
OleDbDataAdapter _dataAdapterOle = new OleDbDataAdapter();
OleDbCommand _commandOle = new OleDbCommand();
_commandOle.CommandText = "select * from words";
_commandOle.Connection = _connOle;
_dataAdapterOle.SelectCommand = _commandOle;
_dataAdapterOle.Fill(_dataSet,"words");
_dataAdapterOle.Dispose();
_dataAdapterOle = null;
_commandOle.CommandText = "select * from poss";
_commandOle.Connection = _connOle;
_dataAdapterOle = new OleDbDataAdapter(_commandOle);
_dataAdapterOle.Fill(_dataSet,"poss");
_dataAdapterOle.Dispose();
_dataAdapterOle = null;
_commandOle.CommandText = "select * from hanzixm";
_commandOle.Connection = _connOle;
_dataAdapterOle = new OleDbDataAdapter(_commandOle);
_dataAdapterOle.Fill(_dataSet,"hanzixm");
_dataAdapterOle.Dispose();
_dataAdapterOle = null;
//未知错误....Keqian
// _commandOle.CommandText = "select * from names";
// _commandOle.Connection = _connOle;
// _dataAdapterOle = new OleDbDataAdapter(_commandOle);
// _dataAdapterOle.Fill(_dataSet,"namesFee");
// _dataAdapterOle.Dispose();
// _dataAdapterOle = null;
_commandOle.CommandText = "select * from sense";
_commandOle.Connection = _connOle;
_dataAdapterOle = new OleDbDataAdapter(_commandOle);
_dataAdapterOle.Fill(_dataSet,"sense");
_dataAdapterOle.Dispose();
_dataAdapterOle = null;
// _dataSet.Relations.Add("wids",_dataSet.Tables["words"].Columns["wid"],
// _dataSet.Tables["poss"].Columns["wid"]);
//原来数据不对应,导致加约束失败
_connOle.Close();
_openDBb = true;//表示open db 成功
#endregion
#region == 将数据读入到Arraylist中
wordsStr word = new wordsStr();
foreach(DataRow dr in _dataSet.Tables["words"].Rows)
{
word.wordPrase=(string)dr["word"];
word.eNumPoW = 0;//表示词性暂时默认为0
word.wFreq = (int)dr["wfreq"];
if ((UInt16)word.wordPrase[0]>= 0x4E00 && (UInt16)word.wordPrase[0] <= 0x9FA5)
{
int d = word.wordPrase[0]-0x4E00;
LibArray[d].Add(word);
}
}
for (int i = 0; i < LibArray.Length; i++)
{
if (LibArray[i].Count ==0 )
{
char a = (char)(i+0x4E00);
word.wordPrase =a.ToString();
word.wFreq = 1;
LibArray[i].Add(word);
}
}
#endregion
_dataSet.Clear();//删除_dataSet
MessageBox.Show("载入成功!","操作结果");
}
}
else
{
MessageBox.Show("没有选择数据库文件!","错误!");
}
}
#endregion
#region == 打开待处理文件==
private void btOpenFile_Click(object sender, System.EventArgs e)
{
OpenFileDialog _openFileDlg = new OpenFileDialog();
_openFileDlg.Title = "打开待处理文本文件";
_openFileDlg.Filter = "文本文件(*.txt)|*.txt";
if (_openFileDlg.ShowDialog()==DialogResult.OK)
{
_fileNameStr = _openFileDlg.FileName;
MessageBox.Show("打开成功","操作结果");
}
}
#endregion
#region ==进行分词处理==
private void btMMMethod_Click(object sender, System.EventArgs e)
{
//(1)确定是否已经打开
if(!_openDBb)
{
MessageBox.Show("请先打开词库!","结果");
return;
}
if (_fileNameStr == "")
{
MessageBox.Show("没有选择待处理文件","结果");
return;
}
//
DateTime dtStart = DateTime.Now;
//(2)打开文件,并对文件进行读取,按照一句话一句话进行处理。
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -