⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crawler.cs

📁 用C#编写的一个款搜索engine的源代码!摘自<Visual c#2005 程序设计>
💻 CS
字号:
using System;
using System.IO;
using System.Threading;
using SECompare.Config;
using SECompare.SearchAPI;
using SECompare.Structure;

namespace SECompare.Kernel
{
	/// <summary>
	/// Crawler, which is used to crawl search results from search engines.
	/// </summary>
	public class Crawler:ThreadStatus
	{
		private int mMaxLength;   //Max amount of results for a query, for example 1000.

		public Crawler(int maxLength)
		{
			this.mMaxLength = maxLength;
			this.threadStart = new ThreadStart(Crawl);
		}

		private void Crawl()
		{
			try
			{
				//(new Config.Log()).Append("Crawling - "+this.mEngineSetting.ToString());
				//Get all samplinged queries list files
				String[] files = Directory.GetFiles((new Config.Config()).SamplingedQueryRoot);
				for(int i=0;i<files.Length;i++)
				{				
					//Set Thread Status Report
					this.mFilePercentage = (float)i/files.Length; 
					//Crawl
					this.Crawl(files[i]);
				}
				
			}
			catch(ThreadInterruptedException)
			{
			
			}
			finally
			{
				this.mIsFinished = true;
			}
		}

		/// <summary>
		/// Crawl a sample query file.
		/// </summary>
		/// <param name="filename">File name</param>
		private void Crawl(String filename)
		{
			FileInfo file = new FileInfo(filename);
				
			//read query samples
			String category = file.Name;
			CrawlSample sample = new CrawlSample();
			sample.Load(category,filename);

			//get query strings
			String[] queries = sample.GetQueries();

			for(int i=0;i<queries.Length;i++)
			{
				String words = queries[i];

				//************Progress Status Report***********
				this.mProcessingFile  = category;
				this.mProcessingEntry = words;
				this.mEntryPercentage = (float)i/queries.Length;  
				//*********************************************
			
				//For each query
				//1. search Google
				if( this.mEngineSetting.IsSelected(EngineSet.Google) )
					if(RunGoogle(words)==false)
					{
						i--;
						continue;
					}
				//2. search MSN
				if( this.mEngineSetting.IsSelected(EngineSet.MSN) )
					if(RunMSN(words)==false)
					{
						i--;
						continue;
					}
				//3. search Yahoo
				if( this.mEngineSetting.IsSelected(EngineSet.Yahoo) )
					if(RunYahoo(words)==false)
					{
						i--;
						continue;
					}

			}

		}

		private bool RunYahoo(String words)
		{
			try
			{
				int preStart,start,length;
				preStart = -1;
				QueryResults yahoo = new QueryResults(words,EngineSet.Yahoo,this.mMaxLength);
				while(yahoo.GetNextQueryRange(out start,out length)==true)
				{
					if(preStart==start) 
						break;
					if(Yahoo.Crawl(words,start,length)==false)
						break;
					preStart = start;
					yahoo.RefreshFlags();
					//Sleep a while
					Thread.Sleep(6000);
				}
				return true;
			}
			catch(System.Net.WebException ex)
			{
                Config.Log.Append(ex.Message);
				System.Threading.Thread.Sleep(600000);
				Config.Log.Append("Yahoo: Thread Sleep 10 minutes.");
				return false;
			}				
		}

		private bool RunMSN(String words)
		{
			try
			{
				int preStart,start,length;
				preStart = -1;
				QueryResults msn = new QueryResults(words,EngineSet.MSN,this.mMaxLength);
				while(msn.GetNextQueryRange(out start,out length)==true)
				{
					if(preStart==start) 
						break;
					if(MSN.Crawl(words,start,length)==false)
						break;
					preStart = start;
					msn.RefreshFlags();
					//Sleep a while
					Thread.Sleep(100);
				}
				return true;
			}
			catch(System.Net.WebException ex)
			{
                Config.Log.Append(ex.Message);
				System.Threading.Thread.Sleep(2000);
                Config.Log.Append("MSN: Thread Sleep 2 second.");
				return false;
			}
				
				
		}

		/// <summary>
		/// 
		/// </summary>
		/// <param name="words"></param>
		/// <returns></returns>
		private bool RunGoogle(String words)
		{
			try
			{
				int preStart,start,length;
				preStart = -1;
				QueryResults google = new QueryResults(words,EngineSet.Google,this.mMaxLength);
				while(google.GetNextQueryRange(out start,out length)==true)
				{
					if(preStart==start) 
						break;
					if(Google.Crawl(words,start,length)==false)
						break;
					preStart = start;
					google.RefreshFlags();
					//Sleep a while
					Thread.Sleep(100);
				}
				return true;
			}
			catch(System.Net.WebException ex)
			{
                Config.Log.Append(ex.Message);
				System.Threading.Thread.Sleep(2000);
                Config.Log.Append("Google: Thread Sleep 2 second.");
				return false;
			}
				
				
		}

	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -