⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indexreptile.cs

📁 小型搜索软件的源代码
💻 CS
字号:
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Security.Cryptography;
using System.Collections;
using System.Reflection;
using System.Threading;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Analysis.Standard;
using ShootSearch.Plugin;
using ShootSearch.IFilter;
using ShootSearch.Logging;
using ShootSearch.Helper;
//using ShootSearch.Spiders;

namespace ShootSearch.Indexing
{
	/// <summary>
	/// IndexReptile 的摘要说明。
	/// </summary>
	public class IndexReptile
	{
		
		private static Hashtable plugins = new Hashtable();
		private static bool m_IsRunning = false ;
		private static Queue m_DocQueue = new Queue();
		private static Thread threadIndexer ;
		private static object ThreadLock = new object();
		private static bool NeedStop ;
		private static bool UpdateIndex = false ;
		private static bool m_WaitToFinish = false ;
		private static int FreeTime;

		#region 类属性
		/// <summary>
		/// 开始制作索引标志
		/// </summary>
		public static bool IsRunning
		{
			get
			{
				return m_IsRunning;
			}
			set
			{
				m_IsRunning = value ;
			}
		}

		/// <summary>
		/// 搜索完成,索引队列为空可以退出
		/// </summary>
		public static bool WaitToFinish
		{
			get
			{
				return m_WaitToFinish;
			}
			set
			{
				m_WaitToFinish = value ;
			}
		}

		/// <summary>
		/// 插件
		/// </summary>
		public static Hashtable Plugins
		{
			get
			{
				return plugins;
			}
		}
		#endregion

		#region 添加插件
		/// <summary>
		/// 添加插件
		/// </summary>
		/// <param name="plugin"></param>
		public static void AddParserPlugin(IParserPlugin plugin)
		{
			//			Log.Echo("Adding plugin for: " + plugin.Extensions[0]);
			//if (IsRunning)
			//	throw new ApplicationException("The plugin cannot be added when IndexerQueue is running.");

			foreach (string extension in plugin.Extensions)
			{
				if (!extension.StartsWith("."))
				{
					Log.Debug("Parser not loaded. Extension '" + extension + "' must start with a dot.");
					continue;
				}
				string extLower = extension.ToLower();
				if (!plugins.ContainsKey(extLower))
					plugins[extLower] = plugin;
			}
		}
		#endregion

		#region 自动加载插件
		/// <summary>
		/// 自动加载插件
		/// </summary>
		public static void loadPlugins()
		{
			string pluginsDir = Directory.GetParent(Assembly.GetExecutingAssembly().Location) + "/plugins"; 
			DirectoryInfo di = new DirectoryInfo(pluginsDir);
			
			if (!di.Exists)
				return;

			FileInfo[] files = di.GetFiles("*.dll");
			
			// ensure that the order is alphabetical
			Array.Sort(files,new  AlphabeticComparer());

			foreach (FileInfo fi in files)
			{
				try 
				{
					loadPlugin(fi.FullName);
				}
				catch (Exception e)
				{
					Log.Debug(e);
				}
			}
		}

		public static void loadPlugin(string path)
		{
			Assembly testasm = Assembly.LoadFrom(path);
			Type[] asmtypes = testasm.GetTypes();
			foreach (Type t in asmtypes)
			{
				if (t.IsClass & t.GetInterface("IParserPlugin") != null)
				{
					object o = testasm.CreateInstance(t.FullName);
					IParserPlugin plugin = o as IParserPlugin;
					if (plugin == null)
						return;

					IndexReptile.AddParserPlugin(plugin);
				}
			}
		}

		#endregion

		#region 添加索引信息
		/// <summary>
		/// 添加索引信息
		/// </summary>
		/// <param name="p_strUrl">远程URL</param>
		/// <param name="p_strFile">本地Cache</param>
		/// <param name="p_Title">Html文件标题</param>
		public static void IndexDocs(Uri p_Url,string p_strFile,string p_Title)
		{
			try
			{
				if(UpdateIndex)
				{
					DeleteDocuments(p_Url.ToString());
				}
			}
			catch{}

			IndexWriter writer = new IndexWriter(Config.IndexDirectory, new StandardAnalyzer(), false);

			Document doc = new Document();

			FileInfo fi= new FileInfo(p_strFile);

			doc.Add(new Field("name",p_Title, true, true, true));
			doc.Add(new Field("url", p_Url.ToString(), true, true, false));
			doc.Add(new Field("cache", p_strFile.Substring(Config.CacheDirectory.Length), true,false, false));
			doc.Add(Field.Keyword("created", DateTime.Now.ToString()));
			//doc.Add(Field.Keyword("modified", DateTime.Now.ToString()));
			doc.Add(Field.Keyword("extension",fi.Extension));
			doc.Add(new Field("length", fi.Length.ToString(), true, false, false));

			string strBody = getBody(fi);
			//Console.WriteLine(strBody);
			doc.Add(new Field("body", strBody, true, true, true));
			try
			{
				writer.AddDocument(doc);
			}
			catch{}
			finally
			{
				writer.Close();
			}
			//记录索引处理进度
			Log.m_Console.SetURLCount(m_DocQueue.Count);

			//假如没有启用映象就要删除下载的文件
			if(!Config.EnableCache)
			{
				try
				{
					File.Delete(p_strFile);
				}
				catch
				{}
			}
		}

		/// <summary>
		/// Save index information
		/// </summary>
		/// <param name="p_strUrl">远程URL</param>
		/// <param name="p_strFile">本地Cache</param>
		public static void IndexDocs(Uri p_Url,string p_strFile)
		{
			IndexDocs(p_Url,p_strFile,Tools.GetUrlName(p_Url));
		
		}
		#endregion

		#region 删除已经存在的文档索引
		/// <summary>
		/// 删除已经存在的文档索引
		/// </summary>
		/// <param name="p_Url"></param>
		private static void DeleteDocuments(string p_Url)
		{
			IndexReader r = IndexReader.Open(Config.IndexDirectory);
			try
			{
				int deletedCount = r.Delete(new Term("url", p_Url));
				//Console.WriteLine("DELETED: " + p_Url);
			}
			finally
			{
				r.Close();
			}
		}

		#endregion

		#region  Init()初始化
		/// <summary>
		/// 初始化
		/// </summary>
		/// <param name="p_Type"></param>
		public static void Init(int p_Type)
		{
			//Start();

			if(!Config.AutoUpdateIndex || !File.Exists(Config.IndexDirectory  + "\\segments")) 
			{
				Console.WriteLine(File.Exists(Config.IndexDirectory  + "\\segments"));
				IndexWriter writer = new IndexWriter(Config.IndexDirectory, new StandardAnalyzer(), true);
				writer.Close();
				UpdateIndex = false ;
			}
			else
				UpdateIndex = true ;

			NeedStop = false ;
			WaitToFinish = false ;
			FreeTime = 0 ;

			if(p_Type ==0)
			{
				//MainForm = p_Form ;
				IndexReptile.loadPlugins();
			}
			//Console.WriteLine("test test test!!");
		}
		#endregion

		#region optimizeIndex() 结束
		/// <summary>
		/// 结束
		/// </summary>
		public static void optimizeIndex()
		{	
			IndexWriter writer = new IndexWriter(Config.IndexDirectory, new StandardAnalyzer(),false);

			try
			{

				writer.Optimize();
			}
			catch{}
			finally
			{
				writer.Close();				
			}
			Log.m_Console.FinishIndex();
		}
		#endregion

		#region StopIndex() 停止索引
		/// <summary>
		/// 停止索引
		/// </summary>
		public static void StopIndex()
		{
			NeedStop = true ;
		}
		#endregion

		#region 获取文件文本
		/// <summary>
		/// 获取文件文本
		/// </summary>
		/// <param name="fi"></param>
		/// <returns></returns>
		private static string getBody(FileInfo fi)
		{
			try
			{
				IParserPlugin plugin = (IParserPlugin) IndexReptile.Plugins[fi.Extension.ToLower()];
				if (plugin != null)
				{
					// if it is successful use the result
					//					Log.Echo("Calling plugin for " + fi.FullName);
					return plugin.Extract(fi.FullName);
				}
			}
			catch (Exception e)
			{
				Log.Debug(e);
			}

			// otherwise try the default parser
			try
			{
				//				Log.Echo("Calling default parser for " + fi.FullName);
				return Parser.Parse(fi.FullName);
			}
			catch (Exception)
			{
				// if there is a problem with parsing we still index the file name and other attributes
				return String.Empty;
			}
		}

		#endregion

		#region Convert html to txt
		/// <summary>
		/// 去除HTML标记
		/// </summary>
		/// <param name="strHtml">包括HTML的源码 </param>
		/// <returns>已经去除后的文字</returns>
		public static string StripHTML(string strHtml)
		{
			string temp = Regex.Replace(strHtml, "<[^>]*>", "");
			return temp.Replace(" ", " ");

		}

		#endregion

		#region 增加文件到索引队列
		/// <summary>
		/// 增加文件到索引队列
		/// </summary>
		/// <param name="p_Url"></param>
		/// <param name="p_strFile"></param>
		/// <param name="p_Title"></param>
		public static void Add(Uri p_Url,string p_strFile,string p_Title)
		{
			ArrayList QueueElement = new ArrayList();
			QueueElement.Add(p_Url);
			QueueElement.Add(p_strFile);
			QueueElement.Add(p_Title);
			m_DocQueue.Enqueue(QueueElement);
			
			if(!IsRunning)
				Start();
		}
		#endregion

		#region Run() 索引线程运行起点
		/// <summary>
		/// 索引线程运行起点
		/// </summary>
		public static void Run()
		{

			while (IsRunning)
			{
				if( m_DocQueue.Count != 0 && !NeedStop)
				{
					ArrayList QueueElement = (ArrayList) m_DocQueue.Dequeue() ;
					Uri url = (Uri)QueueElement[0];
					string FilePath = (string)QueueElement[1];
					string Title = (string)QueueElement[2] ;
					IndexDocs(url,FilePath,Title);
					FreeTime = 0;
				}
				else
				{
					FreeTime ++ ;
					Thread.Sleep(10);
					if (NeedStop)
					{
						IsRunning  = false ;
						optimizeIndex();
					}
					if ( WaitToFinish  && FreeTime >= 3000)
					{

						IsRunning  = false ;
						optimizeIndex();
					}

					
				}
			}
		
		}
		#endregion

		#region  Start() 索引线程开始工作
		/// <summary>
		/// 索引线程开始工作
		/// </summary>
		public static void Start()
		{
			IsRunning = true;
			threadIndexer = new Thread(new ThreadStart(Run));
			threadIndexer.Name = "ShootIndexer";
			threadIndexer.Start();

		}

		#endregion
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -