📄 htmlcrawler.cs
字号:
using System;
using System.IO;
using System.Threading;
using SECompare.Config;
using SECompare.SearchAPI;
using SECompare.Structure;
namespace SECompare.Kernel
{
/// <summary>
/// HTMLCrawler.
/// </summary>
public class HTMLCrawler:ThreadStatus
{
public HTMLCrawler()
{
this.threadStart = new ThreadStart(Crawl);
}
private void Crawl()
{
try
{
//Get all samplinged queries list files
String[] files = Directory.GetFiles((new Config.Config()).SamplingedQueryRoot);
for(int i=0;i<files.Length;i++)
{
//************Progress Status Report***********
this.mFilePercentage = (float)i/files.Length;
//*********************************************
this.Crawl(files[i]);
}
}
catch(ThreadInterruptedException)
{
}
finally
{
this.mIsFinished = true;
}
}
/// <summary>
/// Crawl a sample query file.
/// </summary>
/// <param name="filename">File name</param>
private void Crawl(String filename)
{
FileInfo file = new FileInfo(filename);
//read query samples
String category = file.Name;
CrawlSample sample = new CrawlSample();
sample.Load(category,filename);
//get query strings
String[] queries = sample.GetQueries();
//get all selected engines
String[] engines = this.mEngineSetting.SelectedEngines;
//For each query
for(int i=0;i<queries.Length;i++)
{
String query = queries[i];
//************Progress Status Report***********
this.mProcessingFile = category;
this.mEntryPercentage = (float)(i+1)/queries.Length;
//*********************************************
//For each Rank
for(int rank=1;rank<=1000;rank++)
{
//************Progress Status Report***********
this.mProcessingEntry = query + "(Rank:" + rank +")";
//*********************************************
//for each selected engines, crawl HTML
foreach(String engine in engines)
this.CrawlPage(query,engine,rank);
}
}
}
private bool CrawlPage(String Query,String Engine,int Rank)
{
try
{
QueryRecord record = QueryRecord.Load(Query,Engine,Rank);
//If record does not exist
if(record==null)
return false;
//If HTM has been crawled
if(QueryRecord.ExistsHTML(Query,Engine,Rank))
return false;
String HTML=null;
if(SinglePage.SinglePage.Download(record.URL,ref HTML))
{
record.SaveHTML(HTML);
Config.Log.Append("HTML Saved:" + record.URL);
return true;
}
else
{
return false;
}
}
catch(System.Net.WebException ex)
{
Config.Log.Append("HTML DownLoad Error:"+ex.Message);
return false;
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -