📄 crawler.cs
字号:
using System;
using System.IO;
using System.Threading;
using SECompare.Config;
using SECompare.SearchAPI;
using SECompare.Structure;
namespace SECompare.Kernel
{
/// <summary>
/// Crawler, which is used to crawl search results from search engines.
/// </summary>
public class Crawler:ThreadStatus
{
private int mMaxLength; //Max amount of results for a query, for example 1000.
public Crawler(int maxLength)
{
this.mMaxLength = maxLength;
this.threadStart = new ThreadStart(Crawl);
}
private void Crawl()
{
try
{
//(new Config.Log()).Append("Crawling - "+this.mEngineSetting.ToString());
//Get all samplinged queries list files
String[] files = Directory.GetFiles((new Config.Config()).SamplingedQueryRoot);
for(int i=0;i<files.Length;i++)
{
//Set Thread Status Report
this.mFilePercentage = (float)i/files.Length;
//Crawl
this.Crawl(files[i]);
}
}
catch(ThreadInterruptedException)
{
}
finally
{
this.mIsFinished = true;
}
}
/// <summary>
/// Crawl a sample query file.
/// </summary>
/// <param name="filename">File name</param>
private void Crawl(String filename)
{
FileInfo file = new FileInfo(filename);
//read query samples
String category = file.Name;
CrawlSample sample = new CrawlSample();
sample.Load(category,filename);
//get query strings
String[] queries = sample.GetQueries();
for(int i=0;i<queries.Length;i++)
{
String words = queries[i];
//************Progress Status Report***********
this.mProcessingFile = category;
this.mProcessingEntry = words;
this.mEntryPercentage = (float)i/queries.Length;
//*********************************************
//For each query
//1. search Google
if( this.mEngineSetting.IsSelected(EngineSet.Google) )
if(RunGoogle(words)==false)
{
i--;
continue;
}
//2. search MSN
if( this.mEngineSetting.IsSelected(EngineSet.MSN) )
if(RunMSN(words)==false)
{
i--;
continue;
}
//3. search Yahoo
if( this.mEngineSetting.IsSelected(EngineSet.Yahoo) )
if(RunYahoo(words)==false)
{
i--;
continue;
}
}
}
private bool RunYahoo(String words)
{
try
{
int preStart,start,length;
preStart = -1;
QueryResults yahoo = new QueryResults(words,EngineSet.Yahoo,this.mMaxLength);
while(yahoo.GetNextQueryRange(out start,out length)==true)
{
if(preStart==start)
break;
if(Yahoo.Crawl(words,start,length)==false)
break;
preStart = start;
yahoo.RefreshFlags();
//Sleep a while
Thread.Sleep(6000);
}
return true;
}
catch(System.Net.WebException ex)
{
Config.Log.Append(ex.Message);
System.Threading.Thread.Sleep(600000);
Config.Log.Append("Yahoo: Thread Sleep 10 minutes.");
return false;
}
}
private bool RunMSN(String words)
{
try
{
int preStart,start,length;
preStart = -1;
QueryResults msn = new QueryResults(words,EngineSet.MSN,this.mMaxLength);
while(msn.GetNextQueryRange(out start,out length)==true)
{
if(preStart==start)
break;
if(MSN.Crawl(words,start,length)==false)
break;
preStart = start;
msn.RefreshFlags();
//Sleep a while
Thread.Sleep(100);
}
return true;
}
catch(System.Net.WebException ex)
{
Config.Log.Append(ex.Message);
System.Threading.Thread.Sleep(2000);
Config.Log.Append("MSN: Thread Sleep 2 second.");
return false;
}
}
/// <summary>
///
/// </summary>
/// <param name="words"></param>
/// <returns></returns>
private bool RunGoogle(String words)
{
try
{
int preStart,start,length;
preStart = -1;
QueryResults google = new QueryResults(words,EngineSet.Google,this.mMaxLength);
while(google.GetNextQueryRange(out start,out length)==true)
{
if(preStart==start)
break;
if(Google.Crawl(words,start,length)==false)
break;
preStart = start;
google.RefreshFlags();
//Sleep a while
Thread.Sleep(100);
}
return true;
}
catch(System.Net.WebException ex)
{
Config.Log.Append(ex.Message);
System.Threading.Thread.Sleep(2000);
Config.Log.Append("Google: Thread Sleep 2 second.");
return false;
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -