⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 form1.cs

📁 实现文本的特征提取
💻 CS
📖 第 1 页 / 共 2 页
字号:
			this.Controls.Add(this.btn_countTF_Test);
			this.Controls.Add(this.btn_calculateSimValue);
			this.Controls.Add(this.btn_createTermLib);
			this.Controls.Add(this.btn_Weight);
			this.Controls.Add(this.groupBox1);
			this.Controls.Add(this.btn_selectDoc);
			this.Controls.Add(this.btn_Serial);
			this.Controls.Add(this.btn_countTF);
			this.Controls.Add(this.panel1);
			this.Controls.Add(this.panel2);
			this.Controls.Add(this.label1);
			this.Name = "Form1";
			this.Text = "文档训练管理";
			this.groupBox1.ResumeLayout(false);
			this.ResumeLayout(false);

		}
		#endregion

		/// <summary>
		/// 应用程序的主入口点。
		/// </summary>
		[STAThread]
		static void Main() 
		{
			Application.Run(new Form1());
		}

		private void btn_selectDoc_Click(object sender, System.EventArgs e)
		{
			folderBrowserDialog1.ShowNewFolderButton=false;	//默认设置按钮为灰

			if (folderBrowserDialog1.ShowDialog(this)==DialogResult.OK) 
			{
				folderPath = folderBrowserDialog1.SelectedPath.ToString();		
			}

			this.btn_selectDoc.Enabled = false;
			this.btn_Serial.Enabled = true;
			this.btn_countTF.Enabled = false;
			this.btn_Weight.Enabled = false;
			this.btn_createTermLib.Enabled = false;
		}

		private void btn_Serial_Click(object sender, System.EventArgs e)
		{
			try
			{
				if(folderPath != null)
				{
					GetFileListFromDirectory(folderPath, "DocInforLib");					

					//MessageBox.Show("编号完成,此次共有"+listView1.Items.Count.ToString()+"类训练材料,计"+TotalFileNum.ToString()+"篇参加编号!");
					
				}
				else
				{
					MessageBox.Show("请先选择训练文档所在的根文件夹!");				
				}//if
			}
			catch(Exception ep)
			{
				MessageBox.Show(ep.ToString());
			}
			finally
			{
				this.btn_selectDoc.Enabled = false;
				this.btn_Serial.Enabled = false;
				this.btn_countTF.Enabled = true;
				this.btn_Weight.Enabled = false;
				this.btn_createTermLib.Enabled = false;
			}//try
		}

		//对指定指定目录下面训练文档进行编号
		private void GetFileListFromDirectory(string DirectoryName, string tableName)
		{
			listView1.CheckBoxes = true;
			DirectoryInfo directory = new DirectoryInfo(DirectoryName);	//获取当前选择的目录完整路径

			DirectoryInfo[] subDirectoryList = directory.GetDirectories();	//获取当前选择目录下的子目录完全列表
			
			if(subDirectoryList.Length > 0)
			{
				for(int i=0; i<subDirectoryList.Length; i++)	//所有子目录作循环操作
				{
					GetFileListFromDirectory(subDirectoryList[i].FullName,tableName);
				}
			}
			else
			{
				FileInfo[] fileList = directory.GetFiles();	//获得当前目录下所有子文件列表
		
				string fileCateName = null;
				int count = 0; //从0开始计数
				for(int i=0; i<fileList.Length; i++)	//对单个文档文件进行编号
				{
					string filePath = fileList[i].FullName;
				
					//获取某个文件的所属类别名cateName,如:中小学。存在bug
					fileCateName = fileList[i].FullName.Substring( folderPath.Length, (fileList[i].DirectoryName.Length-folderPath.Length) );
					fileCateName = fileCateName.Substring(1, (fileCateName.Length-1) );							
											
					string cateID = new Njnu.DAL.Category().GetCateIDByCateName(fileCateName);
					string docID = cateID + "-" + (++count);
					
					doc = new Common.Doc();
					doc.CateID = cateID;
					doc.DocID = docID;
					doc.Path = filePath;

					docLib.AddDoc(doc, tableName); 
				}//for语句结束

				listView1.Items.Add(fileCateName);
				listView1.Items[listView1.Items.Count-1].Checked=true;			
				listView1.Items[listView1.Items.Count-1].SubItems.Add(count + "篇");
				listView1.Items[listView1.Items.Count-1].SubItems.Add("编号成功");
			}//if语句结束		
		
		}

		private void btn_countTF_Click(object sender, System.EventArgs e)
		{			
			DataTable dt_doc = new DataTable();
			dt_doc = docLib.GetAllDoc("DocInforLib"); //取得待训练文档

			foreach(DataRow row in dt_doc.Rows)
			{
				doc = new Common.Doc();
				doc.CateID = row["cateID"].ToString();
				doc.DocID = row["docID"].ToString();
				doc.Path = row["docPath"].ToString();

				try
				{									
					wordLib.AddWord(doc, "WordLib");  //将某个文档信息添加至表WordLib
					docLib.SetDone(doc, "DocInforLib"); //设置处理标志IsDone为真

					//下面进行噪音词条、低频词条的滤出
					wordLib.DeleteSigleWord("WordLib");
					wordLib.DeleteRareWord("WordLib");
					wordLib.DeleteWordByTF(5, "WordLib"); //参数表示指定的词频数tf,滤除词频低于tf的单词
				}
				catch(Exception ex)
				{
					throw( new Exception(" 文件"+ doc.Path +"发生错误!" + ex.Message));
				} 
			}		
			this.btn_selectDoc.Enabled = false;
			this.btn_Serial.Enabled = false;
			this.btn_countTF.Enabled = false;
			this.btn_Weight.Enabled = true;
			this.btn_createTermLib.Enabled = false;
			
		}

		private void btn_Weight_Click(object sender, System.EventArgs e)
		{			
			wordLib.CreateTfidfLib();
            
			this.btn_selectDoc.Enabled = false;
			this.btn_Serial.Enabled = false;
			this.btn_countTF.Enabled = false;
			this.btn_Weight.Enabled = false;
			this.btn_createTermLib.Enabled = true;
		}

		private void btn_createTermLib_Click(object sender, System.EventArgs e)
		{
			wordLib.CreateTempTermLib();	//生成临时TermLib
			wordLib.DeleteWordByDF(3); //参数表示指定的文档频数df
			wordLib.CreateTermLib();   //生成TermLib


			this.btn_selectDoc.Enabled = false;
			this.btn_Serial.Enabled = false;
			this.btn_countTF.Enabled = false;
			this.btn_Weight.Enabled = false;
			this.btn_createTermLib.Enabled = false;
		}

		

		private void btn_Serial_Test_Click(object sender, System.EventArgs e)
		{
			try
			{
				if(folderPath != null)
				{
					GetFileListFromDirectory(folderPath, "DocInforLib_test");	
				}
				else
				{
					MessageBox.Show("请先选择训练文档所在的根文件夹!");				
				}//if
			}
			catch(Exception ep)
			{
				MessageBox.Show(ep.ToString());
			}
			finally
			{				
			}//try
		}

		private void btn_countTF_Test_Click(object sender, System.EventArgs e)
		{
			DataTable dt_doc = new DataTable();
			dt_doc = docLib.GetAllDoc("DocInforLib_test"); //取得待测试文档

			foreach(DataRow row in dt_doc.Rows)
			{
				doc = new Common.Doc();
				doc.CateID = row["cateID"].ToString();
				doc.DocID = row["docID"].ToString();
				doc.Path = row["docPath"].ToString();

				try
				{									
					wordLib.AddWord(doc, "WordLib_test");  //将某个文档信息添加至表WordLib_test
					docLib.SetDone(doc, "DocInforLib_test"); //设置处理标志IsDone为真

					//下面进行噪音词条、低频词条的滤出
					wordLib.DeleteSigleWord("WordLib_test");
					wordLib.DeleteRareWord("WordLib_test");
					wordLib.DeleteWordByTF(5, "WordLib_test"); //参数表示指定的词频数tf,滤除词频低于tf的单词
				}
				catch(Exception ex)
				{
					throw( new Exception(" 文件"+ doc.Path +"发生错误!" + ex.Message));
				} 
			}				
		}

		private void btn_Weight_Test_Click(object sender, System.EventArgs e)
		{
			wordLib.CreateTfidfLib_test();
		}

		private void btn_calculateSimValue_Click(object sender, System.EventArgs e)
		{
			string test_result = null;

			DataTable dt_docTest = new DataTable();
			dt_docTest = new DocLib().GetAllTestDoc();  //取得待测试的文档
			
			TfidfLib tfidfLib = new TfidfLib();			
			foreach(DataRow row in dt_docTest.Rows )
			{				
				DataTable dt_tfidfTest = new DataTable();
				dt_tfidfTest = tfidfLib.GetTfidfLibBydocID( row["docID"].ToString() );			
				
				test_result += wordLib.CalculateSimValue(dt_tfidfTest);	//计算相似度,并返回信息				
			}

			FileStream file = new FileStream(@"D:\cq\实验室项目\特征提取部分\训练样本收集部分\test-result.txt",FileMode.OpenOrCreate);
			StreamWriter sw = new StreamWriter(file);
			sw.WriteLine(test_result);			
			sw.Close();
			
		}

	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -