⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 form1.cs

📁 将HTML网页格式中的正文提取出来 主要是小说网上下载的打包小说
💻 CS
字号:
using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.IO;

namespace HTML转TXT
{
	/// <summary>
	/// Form1 的摘要说明。
	/// </summary>
	public class Form1 : System.Windows.Forms.Form
	{
		private System.Windows.Forms.ListBox listBox1;
		private System.Windows.Forms.Label label1;
		private System.Windows.Forms.Button button1;
		private System.Windows.Forms.Button button2;
		private System.Windows.Forms.Button button3;
		private System.Windows.Forms.Button button4;
		private System.Windows.Forms.Button button5;
		private System.Windows.Forms.Button button6;
		private	FileStream srd;
		char[] title={'<','d','i','v',' ','i','d','=','"','t','i','t','l','e','"','>'};
		char[] ee={'<','/','d','i','v','>'};
		char[] ti={'<','d','i','v',' ','i','d','=','"','c','o','n','t','e','n','t','"','>'};
		char[] xx={'&','n','b','s','p',';'};
		char[] an={'b','r',' ','/','>'};
		int a;
		private System.Windows.Forms.TextBox textBox1;
		private System.Windows.Forms.Button button7;
		/// <summary>
		/// 必需的设计器变量。
		/// </summary>
		private System.ComponentModel.Container components = null;

		public Form1()
		{
			//
			// Windows 窗体设计器支持所必需的
			//
			InitializeComponent();

			//
			// TODO: 在 InitializeComponent 调用后添加任何构造函数代码
			//
		}

		/// <summary>
		/// 清理所有正在使用的资源。
		/// </summary>
		protected override void Dispose( bool disposing )
		{
			if( disposing )
			{
				if (components != null) 
				{
					components.Dispose();
				}
			}
			base.Dispose( disposing );
		}

		#region Windows 窗体设计器生成的代码
		/// <summary>
		/// 设计器支持所需的方法 - 不要使用代码编辑器修改
		/// 此方法的内容。
		/// </summary>
		private void InitializeComponent()
		{
			this.button1 = new System.Windows.Forms.Button();
			this.listBox1 = new System.Windows.Forms.ListBox();
			this.label1 = new System.Windows.Forms.Label();
			this.button2 = new System.Windows.Forms.Button();
			this.button3 = new System.Windows.Forms.Button();
			this.button4 = new System.Windows.Forms.Button();
			this.button5 = new System.Windows.Forms.Button();
			this.button6 = new System.Windows.Forms.Button();
			this.textBox1 = new System.Windows.Forms.TextBox();
			this.button7 = new System.Windows.Forms.Button();
			this.SuspendLayout();
			// 
			// button1
			// 
			this.button1.Location = new System.Drawing.Point(304, 64);
			this.button1.Name = "button1";
			this.button1.Size = new System.Drawing.Size(88, 32);
			this.button1.TabIndex = 0;
			this.button1.Text = "转换";
			this.button1.Click += new System.EventHandler(this.button1_Click);
			// 
			// listBox1
			// 
			this.listBox1.ItemHeight = 12;
			this.listBox1.Location = new System.Drawing.Point(8, 24);
			this.listBox1.Name = "listBox1";
			this.listBox1.Size = new System.Drawing.Size(280, 208);
			this.listBox1.TabIndex = 1;
			// 
			// label1
			// 
			this.label1.Location = new System.Drawing.Point(8, 8);
			this.label1.Name = "label1";
			this.label1.Size = new System.Drawing.Size(56, 16);
			this.label1.TabIndex = 2;
			this.label1.Text = "转换列表";
			// 
			// button2
			// 
			this.button2.Location = new System.Drawing.Point(304, 32);
			this.button2.Name = "button2";
			this.button2.Size = new System.Drawing.Size(88, 32);
			this.button2.TabIndex = 3;
			this.button2.Text = "打开";
			this.button2.Click += new System.EventHandler(this.button2_Click);
			// 
			// button3
			// 
			this.button3.Location = new System.Drawing.Point(304, 96);
			this.button3.Name = "button3";
			this.button3.Size = new System.Drawing.Size(88, 32);
			this.button3.TabIndex = 4;
			this.button3.Text = "添加";
			this.button3.Click += new System.EventHandler(this.button3_Click);
			// 
			// button4
			// 
			this.button4.Location = new System.Drawing.Point(304, 160);
			this.button4.Name = "button4";
			this.button4.Size = new System.Drawing.Size(88, 32);
			this.button4.TabIndex = 5;
			this.button4.Text = "删除";
			this.button4.Click += new System.EventHandler(this.button4_Click);
			// 
			// button5
			// 
			this.button5.Location = new System.Drawing.Point(304, 128);
			this.button5.Name = "button5";
			this.button5.Size = new System.Drawing.Size(88, 32);
			this.button5.TabIndex = 6;
			this.button5.Text = "清空";
			this.button5.Click += new System.EventHandler(this.button5_Click);
			// 
			// button6
			// 
			this.button6.Location = new System.Drawing.Point(304, 192);
			this.button6.Name = "button6";
			this.button6.Size = new System.Drawing.Size(88, 32);
			this.button6.TabIndex = 7;
			this.button6.Text = "退出";
			this.button6.Click += new System.EventHandler(this.button6_Click);
			// 
			// textBox1
			// 
			this.textBox1.Location = new System.Drawing.Point(8, 248);
			this.textBox1.Name = "textBox1";
			this.textBox1.Size = new System.Drawing.Size(280, 21);
			this.textBox1.TabIndex = 8;
			this.textBox1.Text = "D:\\";
			// 
			// button7
			// 
			this.button7.Location = new System.Drawing.Point(304, 240);
			this.button7.Name = "button7";
			this.button7.Size = new System.Drawing.Size(88, 32);
			this.button7.TabIndex = 9;
			this.button7.Text = "存放路径";
			this.button7.Click += new System.EventHandler(this.button7_Click);
			// 
			// Form1
			// 
			this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
			this.ClientSize = new System.Drawing.Size(408, 286);
			this.Controls.Add(this.button7);
			this.Controls.Add(this.textBox1);
			this.Controls.Add(this.button6);
			this.Controls.Add(this.button5);
			this.Controls.Add(this.button4);
			this.Controls.Add(this.button3);
			this.Controls.Add(this.button2);
			this.Controls.Add(this.label1);
			this.Controls.Add(this.listBox1);
			this.Controls.Add(this.button1);
			this.Name = "Form1";
			this.Text = "HTML转换TXT";
			this.ResumeLayout(false);

		}
		#endregion

		/// <summary>
		/// 应用程序的主入口点。
		/// </summary>
		[STAThread]
		static void Main() 
		{
			Application.Run(new Form1());
		}

		private bool titl()
		{
			int i=0;
			while(i<=15)
			{
				if((char)a!=title[i])
				{
					break;
				}			
				a=srd.ReadByte();
				i++;
			}
			if(i>15) return true;else return false;
		}


		private bool tit()
		{
			int i=0;
			while(i<=17)
			{
				if((char)a!=ti[i])
				{
					break;
				}			
				a=srd.ReadByte();
				i++;
			}
			if(i>17) return true;else return false;
		}

		private bool endit()
		{
			int j=0;
			while(j<=5)
			{
				if((char)a!=ee[j])
				{
					break;
				}			
				a=srd.ReadByte();
				j++;
			}
			if(j>5) return true;else return false;
		}


		private bool en()
		{
			int j=0;
			while(j<=5)
			{
				if((char)a!=xx[j])
				{
					break;
				}			
				a=srd.ReadByte();
				j++;
			}
			if(j>5) return true;else return false;
		}

		private bool ex()
		{
			int j=0;
			while(j<=4)
			{
				if((char)a!=an[j])
				{
					break;
				}			
				a=srd.ReadByte();
				j++;
			}
			if(j>4) return true;else return false;
		}

		private void button1_Click(object sender, System.EventArgs e)
		{
			String hgb;
			if(listBox1.Items.Count==0)
			{
				MessageBox.Show("listbox count is 0");
				return;
			}
			for(int i=0;i<listBox1.Items.Count;i++)
			{
				srd=File.OpenRead(listBox1.Items[i].ToString());
				//File.Delete("D:\\2.txt");
				hgb=textBox1.Text +"\\"+ i.ToString() + ".txt";
				FileStream swt=File.OpenWrite(hgb);
				do
				{
					a=srd.ReadByte();
				}while(!titl() && a!=-1);
				while(a!=-1)
				{
					if(endit()){swt.WriteByte((byte)('\n'));	break;}
						swt.WriteByte((byte)a);			
					a=srd.ReadByte();			
				}
				do
				{
					a=srd.ReadByte();
				}while(!tit() && a!=-1);
				while(a!=-1)
				{
					if(endit()){break;}
					if(en()){swt.WriteByte((byte)(' ')); continue;}
					if(ex()){swt.WriteByte((byte)('\n')); continue;}
					swt.WriteByte((byte)a);			
					a=srd.ReadByte();			
				}	
				swt.Close();
				srd.Close();
			}
			
		}

		private void button2_Click(object sender, System.EventArgs e)
		{
			int i,j,k;
			String hgb;char[] ggg;
			char[] hhh={'.','h','t','m','l'};
			System.Windows.Forms.FolderBrowserDialog h = new System.Windows.Forms.FolderBrowserDialog();
			Microsoft.VisualBasic.Compatibility.VB6.FileListBox g=new Microsoft.VisualBasic.Compatibility.VB6.FileListBox();
			if(h.ShowDialog()==DialogResult.OK)
			{
				g.Path=h.SelectedPath;
				listBox1.Items.Clear();
				for(i=0;i<g.Items.Count;i++)
				{
					ggg=g.get_Items(i).ToCharArray(0,g.get_Items(i).Length);
					j=0;k=0;
					while(j<g.get_Items(i).Length)
					{
						k=0;
						while(ggg[j]==hhh[k])
						{
							k++;
							j++;
							if(j==g.get_Items(i).Length-1 || k==4)break;
						}
						if(k>=3 && j==g.get_Items(i).Length-1)
						{
							hgb= g.Path + '\\' + g.get_Items(i);
							listBox1.Items.Add(hgb);
							break;
						}
						j++;
					}			
				}
			}
			
		}

		private void button3_Click(object sender, System.EventArgs e)
		{
			OpenFileDialog dlg=new OpenFileDialog();
			dlg.Title="打开HTML";
			dlg.Filter="HTML文件(*.html)|*.html|所有文件(*.*)|*.*";
			if(dlg.ShowDialog()==DialogResult.OK)
			{
				listBox1.Items.Add(dlg.FileName);
			}	
		}

		private void button4_Click(object sender, System.EventArgs e)
		{
			if(listBox1.Items.Count>0)
			{
				if(listBox1.SelectedIndex>=0&&listBox1.SelectedIndex<listBox1.Items.Count)
					listBox1.Items.RemoveAt(listBox1.SelectedIndex);
				else
					MessageBox.Show("You are click it?");
			}
			else
				MessageBox.Show("This ListBox count is 0!");
		}

		private void button5_Click(object sender, System.EventArgs e)
		{
			listBox1.Items.Clear();
		}

		private void button6_Click(object sender, System.EventArgs e)
		{
			this.Close();
		}

		private void button7_Click(object sender, System.EventArgs e)
		{
			System.Windows.Forms.FolderBrowserDialog h = new System.Windows.Forms.FolderBrowserDialog();
			if(h.ShowDialog()==DialogResult.OK)
			{
				textBox1.Text=h.SelectedPath;
			}
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -