⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 form1.cs

📁 中文文本分类中对文本进行分词处理
💻 CS
字号:
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;

namespace wordseg
{
	/// <summary>
	/// Form1 的摘要说明。
	/// </summary>
	public class form1 : System.Windows.Forms.Form
	{
		private System.Windows.Forms.TextBox textBox1;
		private System.Windows.Forms.Label label2;
		private System.Windows.Forms.Button btnSegment;
		private System.Windows.Forms.Button btnLoadPaper;
		private System.Windows.Forms.OpenFileDialog openFileDialog1;
		//private System.Windows.Forms.OpenFileDialog openFileDialog2;/
		private System.Windows.Forms.Button button1;
		private System.Windows.Forms.TextBox txtKeyWord2;
		private System.Windows.Forms.Label label1;
		private System.Windows.Forms.TextBox textBox2;
		private System.Windows.Forms.TextBox textBox3;
		/// <summary>
		/// 必需的设计器变量。
		/// 
		/// 结构体数组,用于存放关键词
		public struct keyWord
		{
			public string array;
			public int count;
		}
		public keyWord[] keyword1=new keyWord[100];
		
		const int MAXLEN=79120;
		public string[] dictionary=new string[MAXLEN];
        
/// <summary>
/// /////////////////////////////////////
/// </summary>
		private System.Windows.Forms.OpenFileDialog openFileDialog2;
		private System.Windows.Forms.Button button2;

		//public string[] dictionary=new string[72000];
		


		/// </summary>
		private System.ComponentModel.Container components = null;

		public form1()
		{
			//
			// Windows 窗体设计器支持所必需的
			//
			InitializeComponent();
			//
			// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
			//
		}

		/// <summary>
		/// 清理所有正在使用的资源。
		/// </summary>
		protected override void Dispose( bool disposing )
		{
			if( disposing )
			{
				if (components != null) 
				{
					components.Dispose();
				}
			}
			base.Dispose( disposing );
		}

		#region Windows 窗体设计器生成的代码
		/// <summary>
		/// 设计器支持所需的方法 - 不要使用代码编辑器修改
		/// 此方法的内容。
		/// </summary>
		private void InitializeComponent()
		{
			this.textBox1 = new System.Windows.Forms.TextBox();
			this.textBox3 = new System.Windows.Forms.TextBox();
			this.label2 = new System.Windows.Forms.Label();
			this.btnSegment = new System.Windows.Forms.Button();
			this.btnLoadPaper = new System.Windows.Forms.Button();
			this.openFileDialog1 = new System.Windows.Forms.OpenFileDialog();
			this.button1 = new System.Windows.Forms.Button();
			this.txtKeyWord2 = new System.Windows.Forms.TextBox();
			this.label1 = new System.Windows.Forms.Label();
			this.textBox2 = new System.Windows.Forms.TextBox();
			this.openFileDialog2 = new System.Windows.Forms.OpenFileDialog();
			this.button2 = new System.Windows.Forms.Button();
			this.SuspendLayout();
			// 
			// textBox1
			// 
			this.textBox1.Location = new System.Drawing.Point(104, 16);
			this.textBox1.Multiline = true;
			this.textBox1.Name = "textBox1";
			this.textBox1.ScrollBars = System.Windows.Forms.ScrollBars.Both;
			this.textBox1.Size = new System.Drawing.Size(248, 128);
			this.textBox1.TabIndex = 0;
			this.textBox1.Text = "";
			this.textBox1.TextChanged += new System.EventHandler(this.textBox1_TextChanged);
			// 
			// textBox3
			// 
			this.textBox3.Location = new System.Drawing.Point(104, 320);
			this.textBox3.Multiline = true;
			this.textBox3.Name = "textBox3";
			this.textBox3.ScrollBars = System.Windows.Forms.ScrollBars.Both;
			this.textBox3.Size = new System.Drawing.Size(248, 104);
			this.textBox3.TabIndex = 2;
			this.textBox3.Text = "";
			this.textBox3.TextChanged += new System.EventHandler(this.textBox3_TextChanged);
			// 
			// label2
			// 
			this.label2.Font = new System.Drawing.Font("宋体", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((System.Byte)(134)));
			this.label2.Location = new System.Drawing.Point(248, 176);
			this.label2.Name = "label2";
			this.label2.Size = new System.Drawing.Size(104, 23);
			this.label2.TabIndex = 3;
			this.label2.Text = "分词结果";
			// 
			// btnSegment
			// 
			this.btnSegment.Location = new System.Drawing.Point(48, 328);
			this.btnSegment.Name = "btnSegment";
			this.btnSegment.Size = new System.Drawing.Size(24, 72);
			this.btnSegment.TabIndex = 5;
			this.btnSegment.Text = "分词";
			this.btnSegment.Click += new System.EventHandler(this.btnSegment_Click);
			// 
			// btnLoadPaper
			// 
			this.btnLoadPaper.Location = new System.Drawing.Point(24, 32);
			this.btnLoadPaper.Name = "btnLoadPaper";
			this.btnLoadPaper.Size = new System.Drawing.Size(64, 23);
			this.btnLoadPaper.TabIndex = 6;
			this.btnLoadPaper.Text = "载入论文";
			this.btnLoadPaper.Click += new System.EventHandler(this.btnLoadPaper_Click);
			// 
			// openFileDialog1
			// 
			this.openFileDialog1.FileOk += new System.ComponentModel.CancelEventHandler(this.openFileDialog1_FileOk);
			// 
			// button1
			// 
			this.button1.Location = new System.Drawing.Point(24, 80);
			this.button1.Name = "button1";
			this.button1.Size = new System.Drawing.Size(64, 23);
			this.button1.TabIndex = 7;
			this.button1.Text = "清空";
			this.button1.Click += new System.EventHandler(this.button1_Click);
			// 
			// txtKeyWord2
			// 
			this.txtKeyWord2.Location = new System.Drawing.Point(376, 48);
			this.txtKeyWord2.Multiline = true;
			this.txtKeyWord2.Name = "txtKeyWord2";
			this.txtKeyWord2.Size = new System.Drawing.Size(104, 376);
			this.txtKeyWord2.TabIndex = 8;
			this.txtKeyWord2.Text = "";
			// 
			// label1
			// 
			this.label1.Location = new System.Drawing.Point(376, 24);
			this.label1.Name = "label1";
			this.label1.TabIndex = 9;
			this.label1.Text = "关键词及权重";
			// 
			// textBox2
			// 
			this.textBox2.Location = new System.Drawing.Point(104, 176);
			this.textBox2.MaxLength = 79120;
			this.textBox2.Multiline = true;
			this.textBox2.Name = "textBox2";
			this.textBox2.ScrollBars = System.Windows.Forms.ScrollBars.Both;
			this.textBox2.Size = new System.Drawing.Size(248, 104);
			this.textBox2.TabIndex = 10;
			this.textBox2.Text = "";
			// 
			// openFileDialog2
			// 
			this.openFileDialog2.FileOk += new System.ComponentModel.CancelEventHandler(this.openFileDialog2_FileOk);
			// 
			// button2
			// 
			this.button2.Location = new System.Drawing.Point(48, 192);
			this.button2.Name = "button2";
			this.button2.Size = new System.Drawing.Size(24, 72);
			this.button2.TabIndex = 11;
			this.button2.Text = "载入词典";
			this.button2.Click += new System.EventHandler(this.button2_Click);
			// 
			// form1
			// 
			this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
			this.ClientSize = new System.Drawing.Size(492, 473);
			this.Controls.Add(this.button2);
			this.Controls.Add(this.textBox2);
			this.Controls.Add(this.label1);
			this.Controls.Add(this.txtKeyWord2);
			this.Controls.Add(this.button1);
			this.Controls.Add(this.btnLoadPaper);
			this.Controls.Add(this.btnSegment);
			this.Controls.Add(this.label2);
			this.Controls.Add(this.textBox3);
			this.Controls.Add(this.textBox1);
			this.Name = "form1";
			this.Text = "分词";
			this.Load += new System.EventHandler(this.form1_Load);
			this.ResumeLayout(false);

		}
		#endregion

		/// <summary>
		/// 应用程序的主入口点。
		/// </summary>
		[STAThread]
		static void Main() 
		{
			Application.Run(new form1());
		}

		private void btnLoadPaper_Click(object sender, System.EventArgs e)
		{       //装载文件
			string str_1;
			if(openFileDialog1.ShowDialog() == DialogResult.OK)
			{
				System.IO.StreamReader sr = new 
					System.IO.StreamReader(openFileDialog1.FileName,System.Text.Encoding.Default);
				//MessageBox.Show(sr.ReadToEnd());
				str_1=sr.ReadToEnd();
				textBox1.Text=str_1;
				//textBox2.Text=str_1.Length.ToString();
				sr.Close();
					
			}
		
		}

		private void openFileDialog1_FileOk(object sender, System.ComponentModel.CancelEventArgs e)
		{
		
		}

		private void form1_Load(object sender, System.EventArgs e)
		{
		
		}

		private void button1_Click(object sender, System.EventArgs e)
		{//清空文本框
			textBox1.Text="";
		    
		}

//
		private void btnSegment_Click(object sender, System.EventArgs e)
		{
			
			//调用分词函数
			segment(textBox1.Text);  
			//输入关键词及其权重
			int k=0;
			string count1,st;
			while(keyword1[k].array!=null)
			{
				
				count1=keyword1[k].count.ToString();//强制类型转换
				st=keyword1[k].array + "  " + count1 + "\r\n";
				textBox3.Text=textBox3.Text + st;
				k++;

			}	
  

		}
		//分词函数
		public void segment(string str1)
		{//分词
			int i,j,k,h,m;
			string temp;
		
			for(j=5;j>=0;j--)
				for(i=0;i<str1.Length-j+1;i++)
				{  
					temp=str1.Substring(i,j);//temp存放从句子中切分的词
					if(compareArray(temp)==true)//判断字典中是否有temp的词
					{

						k=0;
						while((keyword1[k].array!=temp)&&(keyword1[k].array!=null))//
						{
							k++;
						}
						if(keyword1[k].array==null)//记录关键词和关键词个数
						{
							keyword1[k].array=temp;
							keyword1[k].count=1;
						}
						else if(keyword1[k].array!=null)
						{
							keyword1[k].count+=1;
						}
						
                        //循环调用分词函数
						
						
						if(i>0)
						{
							segment(str1.Substring(0,i));
							
						} 
						if((i+j)<str1.Length)
						{
							segment(str1.Substring(i+j,str1.Length-i-j));
							
						}
                        return;			
					}//if

				}//for
			//显示关键词及词频
			
		}
		
		public bool compareArray(string str2)
		{//与字典中的关键词相匹配
			int i;
			//string temp2;
			i=0;
			while(i<dictionary.Length)
			{
				if(str2==dictionary[i])
					break;
				else
					i++;
			}
			if(i<dictionary.Length)
				return true;
			else
				return false;
			//return;


		}

		private void textBox1_TextChanged(object sender, System.EventArgs e)
		{
		  
		}

		private void btnLoadLib_Click(object sender, System.EventArgs e)
		{
			/*if(openFileDialog1.ShowDialog() == DialogResult.OK)
			{
				System.IO.StreamReader sl = new 
					System.IO.StreamReader(openFileDialog1.FileName,System.Text.Encoding.Default);
				//MessageBox.Show(sr.ReadToEnd());
				//str_1=sr.ReadToEnd();
				textBox2.Text=sl.ReadToEnd();
				sl.Close();
				
			}*/

			
		
		}

		private void textBox3_TextChanged(object sender, System.EventArgs e)
		{
		
		}

		private void openFileDialog2_FileOk(object sender, System.ComponentModel.CancelEventArgs e)
		{
		
		}

		private void button2_Click(object sender, System.EventArgs e)
		{
			string nextLine;
			
			if(openFileDialog2.ShowDialog() == DialogResult.OK)
			{
			   
				System.IO.StreamReader sr = new 
					System.IO.StreamReader(openFileDialog2.FileName,System.Text.Encoding.Default);
				System.Collections.Specialized.StringCollection linesCollection=new System.Collections.Specialized.StringCollection();
                
				
				
				while((nextLine=sr.ReadLine())!=null)
				{
					linesCollection.Add(nextLine);
				
				}
				linesCollection.CopyTo(dictionary,0);//词典装入数组dictionary
				//textBox2.Text=sl.ReadToEnd();
				sr.Close();
				System.IO.StreamReader st = new 
					System.IO.StreamReader(openFileDialog2.FileName,System.Text.Encoding.Default);
				textBox2.Text=st.ReadToEnd();//将词典显示在文本框中
				st.Close();
		
			}
			
		}

		
	}
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -