⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mainform.cs

📁 利用VS C#实现的网络爬虫功能
💻 CS
📖 第 1 页 / 共 5 页
字号:
// inventor name: Hatem Mostafa
// Date: 19/3/2006

using System;
using System.Drawing;
using System.Xml;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Resources;
using System.Threading;
using System.Runtime.InteropServices;
using Microsoft.Win32;
using System.Net;
using System.Net.Sockets;
using LiteLib;


namespace Crawler
{
	/// <summary>
	/// Summary description for Form1.
	/// </summary>
	public class CrawlerForm : System.Windows.Forms.Form
	{
		// unique Uri's queue
		private Queue queueURLS;
		// thread that take the browse editor text to parse it
		private Thread threadParse;
		// binary tree to keep unique Uri's
		private SortTree urlStorage;
		// Performance Counter to measure CPU usage
		private System.Diagnostics.PerformanceCounter cpuCounter; 
		// Performance Counter to measure memory usage
		private System.Diagnostics.PerformanceCounter ramCounter; 
		
		// number of bytes downloaded
		private int nByteCount;
		private int ByteCount
		{
			get	{	return nByteCount;	}
			set
			{
				nByteCount = value;
				this.statusBarPanelByteCount.Text = Commas(nByteCount/1024+1)+ " KB";
			}
		}

		// number of errors during the download process
		private int nErrorCount;
		private int ErrorCount
		{
			get	{	return nErrorCount;	}
			set
			{
				nErrorCount = value;
				this.statusBarPanelErrors.Text = Commas(nErrorCount) + " errors";
			}
		}

		// number of Uri's found
		private int nURLCount;
		private int URLCount
		{
			get	{	return nURLCount;	}
			set
			{
				nURLCount = value;
				this.statusBarPanelURLs.Text = Commas(nURLCount)+ " URL found";
			}
		}

		// available memory
		private float nFreeMemory;
		private float FreeMemory
		{
			get	{	return nFreeMemory;	}
			set
			{
				nFreeMemory = value;
				this.statusBarPanelMem.Text = nFreeMemory + " Mb Available";
			}
		}

		// CPU usage
		private int nCPUUsage;
		private int CPUUsage
		{
			get	{	return nCPUUsage;	}
			set
			{
				nCPUUsage = value;
				this.statusBarPanelCPU.Text = "CPU usage " + nCPUUsage +"%";

				Icon icon = Icon.FromHandle(((Bitmap)imageListPercentage.Images[value/10]).GetHicon());
				this.statusBarPanelCPU.Icon = icon;
			}
		}

		// download folder
		private string strDownloadfolder;
		private string Downloadfolder
		{
			get	{	return strDownloadfolder;	}
			set
			{
				strDownloadfolder = value;
				strDownloadfolder = strDownloadfolder.TrimEnd('\\');
			}
		}
		
		// number of files downloaded
		private int nFileCount;
		private int FileCount
		{
			get	{	return nFileCount;	}
			set
			{
				nFileCount = value;
				this.statusBarPanelFiles.Text = Commas(nFileCount)+ " file(s) downloaded";
			}
		}
		
		// threads array
		private Thread[] threadsRun;

		// number of running threads
		private int nThreadCount;
		private int ThreadCount
		{
			get	{	return nThreadCount;	}
			set
			{
				Monitor.Enter(this.listViewThreads);
				try
				{
					for(int nIndex = 0; nIndex < value; nIndex ++)
					{
						// check if thread not created or not suspended
						if(threadsRun[nIndex] == null || threadsRun[nIndex].ThreadState != ThreadState.Suspended)
						{	
							// create new thread
							threadsRun[nIndex] = new Thread(new ThreadStart(ThreadRunFunction));
							// set thread name equal to its index
							threadsRun[nIndex].Name = nIndex.ToString();
							// start thread working function
							threadsRun[nIndex].Start();
							// check if thread dosn't added to the view
							if(nIndex == this.listViewThreads.Items.Count)
							{
								// add a new line in the view for the new thread
								ListViewItem item = this.listViewThreads.Items.Add((nIndex+1).ToString(), 0);
								string[] subItems = { "", "", "", "0", "0%" };
								item.SubItems.AddRange(subItems);
							}
						}
						// check if the thread is suspended
						else	if(threadsRun[nIndex].ThreadState == ThreadState.Suspended)
						{
							// get thread item from the list
							ListViewItem item = this.listViewThreads.Items[nIndex];
							item.ImageIndex = 1;
							item.SubItems[2].Text = "Resume";
							// resume the thread
							threadsRun[nIndex].Resume();
						}
					}
					// change thread value
					nThreadCount = value;
				}
				catch(Exception)
				{
				}
				Monitor.Exit(this.listViewThreads);
			}
		}

		// MIME types string
		private string strMIMETypes = GetMIMETypes();
		private string MIMETypes
		{
			get	{	return strMIMETypes;	}
			set	{	strMIMETypes = value;	}
		}

		// encoding text that includes all settings types in one string
		private Encoding encoding = GetTextEncoding();
		private Encoding TextEncoding
		{
			get	{	return encoding;	}
			set	{	encoding = value;	}
		}

		// timeout of sockets send and receive
		private int nRequestTimeout;
		private int RequestTimeout
		{
			get	{	return nRequestTimeout;	}
			set	{	nRequestTimeout = value;	}
		}

		// the time that each thread sleeps when the refs queue empty
		private int nSleepFetchTime;
		private int SleepFetchTime
		{
			get	{	return nSleepFetchTime;	}
			set	{	nSleepFetchTime = value;	}
		}		
		
		// List of a user defined list of restricted words to enable user to prevent any bad pages 
		private string[] strExcludeWords;
		private string[] ExcludeWords
		{
			get	{	return strExcludeWords;	}
			set	{	strExcludeWords = value;	}
		}

		// List of a user defined list of restricted files extensions to avoid paring non-text data 
		private string[] strExcludeFiles;
		private string[] ExcludeFiles
		{
			get	{	return strExcludeFiles;	}
			set	{	strExcludeFiles = value;	}
		}

		// List of a user defined list of restricted hosts extensions to avoid blocking by these hosts
		private string[] strExcludeHosts;
		private string[] ExcludeHosts
		{
			get	{	return strExcludeHosts;	}
			set	{	strExcludeHosts = value;	}
		}
		
		// the number of requests to keep in the requests view for review requests details
		private int nLastRequestCount;
		private int LastRequestCount
		{
			get	{	return nLastRequestCount;	}
			set	{	nLastRequestCount = value;	}
		}
		
		// the time that each thread sleep after handling any request, 
		// which is very important value to prevent Hosts from blocking the crawler due to heavy load
		private int nSleepConnectTime;
		private int SleepConnectTime
		{
			get	{	return nSleepConnectTime;	}
			set	{	nSleepConnectTime = value;	}
		}

		// represents the depth of navigation in the crawling process
		private int nWebDepth;
		private int WebDepth
		{
			get	{	return nWebDepth;	}
			set	{	nWebDepth = value;	}
		}

		// MIME types are the types that are supported to be downloaded by the crawler 
		// and the crawler includes a default types to be used. 
		private bool bAllMIMETypes;
		private bool AllMIMETypes
		{
			get	{	return bAllMIMETypes;	}
			set	{	bAllMIMETypes = value;	}
		}		

		// to limit crawling process to the same host of the original URL
		private bool bKeepSameServer;
		private bool KeepSameServer
		{
			get	{	return bKeepSameServer;	}
			set	{	bKeepSameServer = value;	}
		}		
		
		// means keep socket connection opened for subsequent requests to avoid reconnect time
		private bool bKeepAlive;
		private bool KeepAlive
		{
			get	{	return bKeepAlive;	}
			set	{	bKeepAlive = value;	}
		}			
		
		// flag to be used to stop all running threads when user request to stop
		bool ThreadsRunning;

		private System.Windows.Forms.MenuItem menuItemFile;
		private System.Windows.Forms.MenuItem menuItemExit;
		private System.Windows.Forms.MenuItem menuItemOptions;
		private System.Windows.Forms.MenuItem menuItemSettings;
		private System.Windows.Forms.MainMenu mainMenu;
		private System.Windows.Forms.ToolBar toolBarMain;
		private System.Windows.Forms.ImageList imageList2;
		private System.ComponentModel.IContainer components;

		private System.Windows.Forms.StatusBar statusBar;
		private System.Windows.Forms.ToolBarButton toolBarButtonPause;
		private System.Windows.Forms.ToolBarButton toolBarButtonStop;
		private System.Windows.Forms.ToolBarButton toolBarButton1;
		private System.Windows.Forms.ToolBarButton toolBarButtonDeleteAll;
		private System.Windows.Forms.ToolBarButton toolBarButton2;
		private System.Windows.Forms.ToolBarButton toolBarButtonSettings;
		private System.Windows.Forms.MenuItem menuItemAbout;
		private System.Windows.Forms.ToolBar toolBarWeb;
		private System.Windows.Forms.TabControl tabControlRightView;
		private System.Windows.Forms.ComboBox comboBoxWeb;
		private System.Windows.Forms.TabPage tabPageThreads;
		private System.Windows.Forms.ListView listViewThreads;
		private System.Windows.Forms.ColumnHeader columnHeaderTHreadID;
		private System.Windows.Forms.ColumnHeader columnHeaderThreadURL;
		private System.Windows.Forms.ColumnHeader columnHeaderThreadBytes;
		private System.Windows.Forms.ColumnHeader columnHeaderThreadPersentage;
		private System.Windows.Forms.ColumnHeader columnHeaderThreadDepth;
		private System.Windows.Forms.StatusBarPanel statusBarPanelMem;
		private System.Windows.Forms.ColumnHeader columnHeaderThreadAction;	
		private System.Windows.Forms.StatusBarPanel statusBarPanelByteCount;
		private System.Windows.Forms.ImageList imageList3;
		private System.Windows.Forms.ToolBarButton toolBarButton4;
		private System.Windows.Forms.Button buttonGo;
		private System.Windows.Forms.ImageList imageList4;
		private System.Windows.Forms.ToolBarButton toolBarButtonContinue;
		private System.Windows.Forms.TabPage tabPageErrors;
		private System.Windows.Forms.ColumnHeader columnHeaderErrorID;
		private System.Windows.Forms.ColumnHeader columnHeaderErrorDescription;
		private System.Windows.Forms.ColumnHeader columnHeaderErrorItem;
		private System.Windows.Forms.Splitter splitter3;
		private System.Windows.Forms.TextBox textBoxErrorDescription;
		private System.Windows.Forms.ListView listViewErrors;
		private System.Windows.Forms.ContextMenu contextMenuBrowse;
		private System.Windows.Forms.MenuItem menuItemBrowseHttp;
		private System.Windows.Forms.StatusBarPanel statusBarPanelErrors;
		private System.Windows.Forms.ToolBarButton toolBarButtonBrowse;
		private System.Windows.Forms.ImageList imageList1;
		private System.Windows.Forms.ColumnHeader columnHeaderDate;
		private System.Windows.Forms.Timer timerMem;
		private System.Windows.Forms.MenuItem menuItem1;
		private System.Windows.Forms.MenuItem menuItemFileMatches;
		private System.Windows.Forms.MenuItem menuItemOutput;
		private System.Windows.Forms.MenuItem menuItemConnections;
		private System.Windows.Forms.MenuItem menuItemHelp;
		private System.Windows.Forms.ContextMenu contextMenuNavigate;
		private System.Windows.Forms.ContextMenu contextMenuSettings;
		private System.Windows.Forms.MenuItem menuItemSettingsFileMatches;
		private System.Windows.Forms.MenuItem menuItemSettingsOutput;
		private System.Windows.Forms.MenuItem menuItemSettingsConnections;
		private System.Windows.Forms.MenuItem menuItemCopy;
		private System.Windows.Forms.MenuItem menuItemPaste;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -