📄 mainform.cs
字号:
// inventor name: Hatem Mostafa
// Date: 19/3/2006
using System;
using System.Drawing;
using System.Xml;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Resources;
using System.Threading;
using System.Runtime.InteropServices;
using Microsoft.Win32;
using System.Net;
using System.Net.Sockets;
using LiteLib;
namespace Crawler
{
/// <summary>
/// Summary description for Form1.
/// </summary>
public class CrawlerForm : System.Windows.Forms.Form
{
// unique Uri's queue
private Queue queueURLS;
// thread that take the browse editor text to parse it
private Thread threadParse;
// binary tree to keep unique Uri's
private SortTree urlStorage;
// Performance Counter to measure CPU usage
private System.Diagnostics.PerformanceCounter cpuCounter;
// Performance Counter to measure memory usage
private System.Diagnostics.PerformanceCounter ramCounter;
// number of bytes downloaded
private int nByteCount;
private int ByteCount
{
get { return nByteCount; }
set
{
nByteCount = value;
this.statusBarPanelByteCount.Text = Commas(nByteCount/1024+1)+ " KB";
}
}
// number of errors during the download process
private int nErrorCount;
private int ErrorCount
{
get { return nErrorCount; }
set
{
nErrorCount = value;
this.statusBarPanelErrors.Text = Commas(nErrorCount) + " errors";
}
}
// number of Uri's found
private int nURLCount;
private int URLCount
{
get { return nURLCount; }
set
{
nURLCount = value;
this.statusBarPanelURLs.Text = Commas(nURLCount)+ " URL found";
}
}
// available memory
private float nFreeMemory;
private float FreeMemory
{
get { return nFreeMemory; }
set
{
nFreeMemory = value;
this.statusBarPanelMem.Text = nFreeMemory + " Mb Available";
}
}
// CPU usage
private int nCPUUsage;
private int CPUUsage
{
get { return nCPUUsage; }
set
{
nCPUUsage = value;
this.statusBarPanelCPU.Text = "CPU usage " + nCPUUsage +"%";
Icon icon = Icon.FromHandle(((Bitmap)imageListPercentage.Images[value/10]).GetHicon());
this.statusBarPanelCPU.Icon = icon;
}
}
// download folder
private string strDownloadfolder;
private string Downloadfolder
{
get { return strDownloadfolder; }
set
{
strDownloadfolder = value;
strDownloadfolder = strDownloadfolder.TrimEnd('\\');
}
}
// number of files downloaded
private int nFileCount;
private int FileCount
{
get { return nFileCount; }
set
{
nFileCount = value;
this.statusBarPanelFiles.Text = Commas(nFileCount)+ " file(s) downloaded";
}
}
// threads array
private Thread[] threadsRun;
// number of running threads
private int nThreadCount;
private int ThreadCount
{
get { return nThreadCount; }
set
{
Monitor.Enter(this.listViewThreads);
try
{
for(int nIndex = 0; nIndex < value; nIndex ++)
{
// check if thread not created or not suspended
if(threadsRun[nIndex] == null || threadsRun[nIndex].ThreadState != ThreadState.Suspended)
{
// create new thread
threadsRun[nIndex] = new Thread(new ThreadStart(ThreadRunFunction));
// set thread name equal to its index
threadsRun[nIndex].Name = nIndex.ToString();
// start thread working function
threadsRun[nIndex].Start();
// check if thread dosn't added to the view
if(nIndex == this.listViewThreads.Items.Count)
{
// add a new line in the view for the new thread
ListViewItem item = this.listViewThreads.Items.Add((nIndex+1).ToString(), 0);
string[] subItems = { "", "", "", "0", "0%" };
item.SubItems.AddRange(subItems);
}
}
// check if the thread is suspended
else if(threadsRun[nIndex].ThreadState == ThreadState.Suspended)
{
// get thread item from the list
ListViewItem item = this.listViewThreads.Items[nIndex];
item.ImageIndex = 1;
item.SubItems[2].Text = "Resume";
// resume the thread
threadsRun[nIndex].Resume();
}
}
// change thread value
nThreadCount = value;
}
catch(Exception)
{
}
Monitor.Exit(this.listViewThreads);
}
}
// MIME types string
private string strMIMETypes = GetMIMETypes();
private string MIMETypes
{
get { return strMIMETypes; }
set { strMIMETypes = value; }
}
// encoding text that includes all settings types in one string
private Encoding encoding = GetTextEncoding();
private Encoding TextEncoding
{
get { return encoding; }
set { encoding = value; }
}
// timeout of sockets send and receive
private int nRequestTimeout;
private int RequestTimeout
{
get { return nRequestTimeout; }
set { nRequestTimeout = value; }
}
// the time that each thread sleeps when the refs queue empty
private int nSleepFetchTime;
private int SleepFetchTime
{
get { return nSleepFetchTime; }
set { nSleepFetchTime = value; }
}
// List of a user defined list of restricted words to enable user to prevent any bad pages
private string[] strExcludeWords;
private string[] ExcludeWords
{
get { return strExcludeWords; }
set { strExcludeWords = value; }
}
// List of a user defined list of restricted files extensions to avoid paring non-text data
private string[] strExcludeFiles;
private string[] ExcludeFiles
{
get { return strExcludeFiles; }
set { strExcludeFiles = value; }
}
// List of a user defined list of restricted hosts extensions to avoid blocking by these hosts
private string[] strExcludeHosts;
private string[] ExcludeHosts
{
get { return strExcludeHosts; }
set { strExcludeHosts = value; }
}
// the number of requests to keep in the requests view for review requests details
private int nLastRequestCount;
private int LastRequestCount
{
get { return nLastRequestCount; }
set { nLastRequestCount = value; }
}
// the time that each thread sleep after handling any request,
// which is very important value to prevent Hosts from blocking the crawler due to heavy load
private int nSleepConnectTime;
private int SleepConnectTime
{
get { return nSleepConnectTime; }
set { nSleepConnectTime = value; }
}
// represents the depth of navigation in the crawling process
private int nWebDepth;
private int WebDepth
{
get { return nWebDepth; }
set { nWebDepth = value; }
}
// MIME types are the types that are supported to be downloaded by the crawler
// and the crawler includes a default types to be used.
private bool bAllMIMETypes;
private bool AllMIMETypes
{
get { return bAllMIMETypes; }
set { bAllMIMETypes = value; }
}
// to limit crawling process to the same host of the original URL
private bool bKeepSameServer;
private bool KeepSameServer
{
get { return bKeepSameServer; }
set { bKeepSameServer = value; }
}
// means keep socket connection opened for subsequent requests to avoid reconnect time
private bool bKeepAlive;
private bool KeepAlive
{
get { return bKeepAlive; }
set { bKeepAlive = value; }
}
// flag to be used to stop all running threads when user request to stop
bool ThreadsRunning;
private System.Windows.Forms.MenuItem menuItemFile;
private System.Windows.Forms.MenuItem menuItemExit;
private System.Windows.Forms.MenuItem menuItemOptions;
private System.Windows.Forms.MenuItem menuItemSettings;
private System.Windows.Forms.MainMenu mainMenu;
private System.Windows.Forms.ToolBar toolBarMain;
private System.Windows.Forms.ImageList imageList2;
private System.ComponentModel.IContainer components;
private System.Windows.Forms.StatusBar statusBar;
private System.Windows.Forms.ToolBarButton toolBarButtonPause;
private System.Windows.Forms.ToolBarButton toolBarButtonStop;
private System.Windows.Forms.ToolBarButton toolBarButton1;
private System.Windows.Forms.ToolBarButton toolBarButtonDeleteAll;
private System.Windows.Forms.ToolBarButton toolBarButton2;
private System.Windows.Forms.ToolBarButton toolBarButtonSettings;
private System.Windows.Forms.MenuItem menuItemAbout;
private System.Windows.Forms.ToolBar toolBarWeb;
private System.Windows.Forms.TabControl tabControlRightView;
private System.Windows.Forms.ComboBox comboBoxWeb;
private System.Windows.Forms.TabPage tabPageThreads;
private System.Windows.Forms.ListView listViewThreads;
private System.Windows.Forms.ColumnHeader columnHeaderTHreadID;
private System.Windows.Forms.ColumnHeader columnHeaderThreadURL;
private System.Windows.Forms.ColumnHeader columnHeaderThreadBytes;
private System.Windows.Forms.ColumnHeader columnHeaderThreadPersentage;
private System.Windows.Forms.ColumnHeader columnHeaderThreadDepth;
private System.Windows.Forms.StatusBarPanel statusBarPanelMem;
private System.Windows.Forms.ColumnHeader columnHeaderThreadAction;
private System.Windows.Forms.StatusBarPanel statusBarPanelByteCount;
private System.Windows.Forms.ImageList imageList3;
private System.Windows.Forms.ToolBarButton toolBarButton4;
private System.Windows.Forms.Button buttonGo;
private System.Windows.Forms.ImageList imageList4;
private System.Windows.Forms.ToolBarButton toolBarButtonContinue;
private System.Windows.Forms.TabPage tabPageErrors;
private System.Windows.Forms.ColumnHeader columnHeaderErrorID;
private System.Windows.Forms.ColumnHeader columnHeaderErrorDescription;
private System.Windows.Forms.ColumnHeader columnHeaderErrorItem;
private System.Windows.Forms.Splitter splitter3;
private System.Windows.Forms.TextBox textBoxErrorDescription;
private System.Windows.Forms.ListView listViewErrors;
private System.Windows.Forms.ContextMenu contextMenuBrowse;
private System.Windows.Forms.MenuItem menuItemBrowseHttp;
private System.Windows.Forms.StatusBarPanel statusBarPanelErrors;
private System.Windows.Forms.ToolBarButton toolBarButtonBrowse;
private System.Windows.Forms.ImageList imageList1;
private System.Windows.Forms.ColumnHeader columnHeaderDate;
private System.Windows.Forms.Timer timerMem;
private System.Windows.Forms.MenuItem menuItem1;
private System.Windows.Forms.MenuItem menuItemFileMatches;
private System.Windows.Forms.MenuItem menuItemOutput;
private System.Windows.Forms.MenuItem menuItemConnections;
private System.Windows.Forms.MenuItem menuItemHelp;
private System.Windows.Forms.ContextMenu contextMenuNavigate;
private System.Windows.Forms.ContextMenu contextMenuSettings;
private System.Windows.Forms.MenuItem menuItemSettingsFileMatches;
private System.Windows.Forms.MenuItem menuItemSettingsOutput;
private System.Windows.Forms.MenuItem menuItemSettingsConnections;
private System.Windows.Forms.MenuItem menuItemCopy;
private System.Windows.Forms.MenuItem menuItemPaste;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -