📄 mainform.cs
字号:
if(thread.ThreadState == ThreadState.Suspended)
thread.Resume();
thread.Abort();
}
}
catch(Exception)
{
}
}
Monitor.Exit(this.listViewThreads);
this.toolBarButtonContinue.Enabled = true;
this.toolBarButtonPause.Enabled = false;
this.buttonGo.Enabled = true;
this.queueURLS.Clear();
this.urlStorage.Clear();
}
void ThreadRunFunction()
{
MyWebRequest request = null;
while(ThreadsRunning && int.Parse(Thread.CurrentThread.Name) < this.ThreadCount)
{
MyUri uri = DequeueUri();
if(uri != null)
{
if(SleepConnectTime > 0)
Thread.Sleep(SleepConnectTime*1000);
ParseUri(uri, ref request);
}
else
Thread.Sleep(SleepFetchTime*1000);
}
Monitor.Enter(this.listViewThreads);
try
{
ListViewItem item = this.listViewThreads.Items[int.Parse(Thread.CurrentThread.Name)];
if(ThreadsRunning == false)
item.SubItems[2].Text = "Stop";
item.ImageIndex = 0;
}
catch(Exception)
{
}
Monitor.Exit(this.listViewThreads);
}
// push uri to the queue
bool EnqueueUri(MyUri uri, bool bCheckRepetition)
{
// add the uri to the binary tree to check if it is duplicated or not
if(bCheckRepetition == true && AddURL(ref uri) == false)
return false;
Monitor.Enter(queueURLS);
try
{
// add the uri to the queue
queueURLS.Enqueue(uri);
}
catch(Exception)
{
}
Monitor.Exit(queueURLS);
return true;
}
// pop uri from the queue
MyUri DequeueUri()
{
Monitor.Enter(queueURLS);
MyUri uri = null;
try
{
uri = (MyUri)queueURLS.Dequeue();
}
catch(Exception)
{
}
Monitor.Exit(queueURLS);
return uri;
}
void RunParser()
{
ThreadsRunning = true;
try
{
string strUri = this.comboBoxWeb.Text.Trim();
if(Directory.Exists(strUri) == true)
ParseFolder(strUri, 0);
else
{
if(File.Exists(strUri) == false)
{
Normalize(ref strUri);
this.comboBoxWeb.Text = strUri;
}
MyUri uri = new MyUri(strUri);
this.EnqueueUri(uri, false);
}
}
catch(Exception e)
{
LogError(this.comboBoxWeb.Text, e.Message);
return;
}
this.toolBarButtonContinue.Enabled = false;
this.buttonGo.Enabled = true;
}
private void Normalize(ref string strURL)
{
if(strURL.StartsWith("http://") == false)
strURL = "http://"+strURL;
if(strURL.IndexOf("/", 8) == -1)
strURL += '/';
}
bool AddURL(ref MyUri uri)
{
foreach(string str in ExcludeHosts)
if(str.Trim().Length > 0 && uri.Host.ToLower().IndexOf(str.Trim()) != -1)
{
LogError(uri.AbsoluteUri, "\r\nHost excluded as it includes reserved pattern ("+str+")");
return false;
}
Monitor.Enter(urlStorage);
bool bNew = false;
try
{
string strURL = uri.AbsoluteUri;
bNew = urlStorage.Add(ref strURL).Count == 1;
}
catch(Exception)
{
}
Monitor.Exit(urlStorage);
return bNew;
}
void LogCell(ref ListViewItem itemLog, int nCell, string str)
{
Monitor.Enter(this.listViewThreads);
try
{
itemLog.SubItems[nCell].Text = str;
}
catch(Exception)
{
}
Monitor.Exit(this.listViewThreads);
}
void ParseUri(MyUri uri, ref MyWebRequest request)
{
string strStatus = "";
// check if connection is kept alive from previous connections or not
if(request != null && request.response.KeepAlive)
strStatus += "Connection live to: "+uri.Host+"\r\n\r\n";
else
strStatus += "Connecting: "+uri.Host+"\r\n\r\n";
ListViewItem itemLog = null;
Monitor.Enter(this.listViewThreads);
try
{ // update thread information in the threads view list
itemLog = this.listViewThreads.Items[int.Parse(Thread.CurrentThread.Name)];
int nDepth = uri.Depth;
itemLog.SubItems[1].Text = nDepth.ToString();
itemLog.ImageIndex = 1;
itemLog.BackColor = Color.WhiteSmoke;
// initialize status to Connect
itemLog.SubItems[2].Text = "Connect";
itemLog.ForeColor = Color.Red;
itemLog.SubItems[3].Text = uri.AbsoluteUri;
itemLog.SubItems[4].Text = "";
itemLog.SubItems[5].Text = "";
}
catch(Exception)
{
}
Monitor.Exit(this.listViewThreads);
try
{
// create web request
request = MyWebRequest.Create(uri, request, KeepAlive);
// set request timeout
request.Timeout = RequestTimeout*1000;
// retrieve response from web request
MyWebResponse response = request.GetResponse();
// update status text with the request and response headers
strStatus += request.Header+response.Header;
// check for redirection
if(response.ResponseUri.Equals(uri) == false)
{
// add the new uri to the queue
this.EnqueueUri(new MyUri(response.ResponseUri.AbsoluteUri), true);
// update status
strStatus += "Redirected to: "+response.ResponseUri+"\r\n";
// log current uri status
LogUri(uri.AbsoluteUri, strStatus);
// reset current request to avoid response socket opening case
request = null;
return;
}
// check for allowed MIME types
if(AllMIMETypes == false && response.ContentType != null && MIMETypes.Length > 0)
{
string strContentType = response.ContentType.ToLower();
int nExtIndex = strContentType.IndexOf(';');
if(nExtIndex != -1)
strContentType = strContentType.Substring(0, nExtIndex);
if(strContentType.IndexOf('*') == -1 && (nExtIndex = MIMETypes.IndexOf(strContentType)) == -1)
{
LogError(uri.AbsoluteUri, strStatus+"\r\nUnlisted Content-Type ("+strContentType+"), check settings.");
request = null;
return;
}
// find numbers
Match match = new Regex(@"\d+").Match(MIMETypes, nExtIndex);
int nMin = int.Parse(match.Value)*1024;
match = match.NextMatch();
int nMax = int.Parse(match.Value)*1024;
if(nMin < nMax && (response.ContentLength < nMin || response.ContentLength > nMax))
{
LogError(uri.AbsoluteUri, strStatus+"\r\nContentLength limit error ("+response.ContentLength+")");
request = null;
return;
}
}
// check for response extention
string[] ExtArray = { ".gif", ".jpg", ".css", ".zip", ".exe" };
bool bParse = true;
foreach(string ext in ExtArray)
if(uri.AbsoluteUri.ToLower().EndsWith(ext) == true)
{
bParse = false;
break;
}
foreach(string ext in ExcludeFiles)
if(ext.Trim().Length > 0 && uri.AbsoluteUri.ToLower().EndsWith(ext) == true)
{
bParse = false;
break;
}
// construct path in the hard disk
string strLocalPath = uri.LocalPath;
// check if the path ends with / to can crate the file on the HD
if(strLocalPath.EndsWith("/") == true)
// check if there is no query like (.asp?i=32&j=212)
if(uri.Query == "")
// add a default name for / ended pathes
strLocalPath += "default.html";
// check if the uri includes a query string
if(uri.Query != "")
// construct the name from the query hash value to be the same if we download it again
strLocalPath += uri.Query.GetHashCode()+".html";
// construct the full path folder
string BasePath = this.Downloadfolder+"\\"+uri.Host+Path.GetDirectoryName(uri.AbsolutePath);
// check if the folder not found
if(Directory.Exists(BasePath) == false)
// create the folder
Directory.CreateDirectory(BasePath);
// construct the full path name of the file
string PathName = this.Downloadfolder+"\\"+uri.Host+strLocalPath.Replace("%20", " ");
// open the output file
FileStream streamOut = File.Open(PathName, FileMode.Create, FileAccess.Write, FileShare.ReadWrite);
BinaryWriter writer = new BinaryWriter(streamOut);
itemLog.SubItems[2].Text = "Download";
itemLog.ForeColor = Color.Black;
// receive response buffer
string strResponse = "";
byte[] RecvBuffer = new byte[10240];
int nBytes, nTotalBytes = 0;
// loop to receive response buffer
while((nBytes = response.socket.Receive(RecvBuffer, 0, 10240, SocketFlags.None)) > 0)
{
// increment total received bytes
nTotalBytes += nBytes;
// write received buffer to file
writer.Write(RecvBuffer, 0, nBytes);
// check if the uri type not binary to can be parsed for refs
if(bParse == true)
// add received buffer to response string
strResponse += Encoding.ASCII.GetString(RecvBuffer, 0, nBytes);
// update view text
itemLog.SubItems[4].Text = Commas(nTotalBytes);
if(response.ContentLength > 0)
itemLog.SubItems[5].Text = '%'+(100-(response.ContentLength-nTotalBytes)*100/response.ContentLength).ToString();
// check if connection Keep-Alive to can break the loop if response completed
if(response.KeepAlive && nTotalBytes >= response.ContentLength && response.ContentLength > 0)
break;
}
// close output stream
writer.Close();
streamOut.Close();
if(response.KeepAlive)
strStatus += "Connection kept alive to be used in subpages.\r\n";
else
{
// close response
response.Close();
strStatus += "Connection closed.\r\n";
}
// update status
strStatus += Commas(nTotalBytes)+" bytes, downloaded to \""+PathName+"\"\r\n";
// increment total file count
FileCount++;
// increment total bytes count
ByteCount += nTotalBytes;
if(ThreadsRunning == true && bParse == true && uri.Depth < WebDepth)
{
strStatus += "\r\nParsing page ...\r\n";
// check for restricted words
foreach(string strExcludeWord in ExcludeWords)
if(strExcludeWord.Trim().Length > 0 && strResponse.IndexOf(strExcludeWord) != -1)
{
LogError(uri.AbsoluteUri, strStatus+"\r\nPage includes reserved word ("+strExcludeWord+")");
EraseItem(itemLog);
File.Delete(PathName);
return;
}
// parse the page to search for refs
string strRef = @"(href|HREF|src|SRC)[ ]*=[ ]*[""'][^""'#>]+[""']";
MatchCollection matches = new Regex(strRef).Matches(strResponse);
strStatus += "Found: "+matches.Count+" ref(s)\r\n";
URLCount += matches.Count;
foreach(Match match in matches)
{
strRef = match.Value.Substring(match.Value.IndexOf('=')+1).Trim('"', '\'', '#', ' ', '>');
try
{
if(strRef.IndexOf("..") != -1 || strRef.StartsWith("/"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -