📄 indexreptile.cs
字号:
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Security.Cryptography;
using System.Collections;
using System.Reflection;
using System.Threading;
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Analysis.Standard;
using ShootSearch.Plugin;
using ShootSearch.IFilter;
using ShootSearch.Logging;
using ShootSearch.Helper;
//using ShootSearch.Spiders;
namespace ShootSearch.Indexing
{
/// <summary>
/// IndexReptile 的摘要说明。
/// </summary>
public class IndexReptile
{
private static Hashtable plugins = new Hashtable();
private static bool m_IsRunning = false ;
private static Queue m_DocQueue = new Queue();
private static Thread threadIndexer ;
private static object ThreadLock = new object();
private static bool NeedStop ;
private static bool UpdateIndex = false ;
private static bool m_WaitToFinish = false ;
private static int FreeTime;
#region 类属性
/// <summary>
/// 开始制作索引标志
/// </summary>
public static bool IsRunning
{
get
{
return m_IsRunning;
}
set
{
m_IsRunning = value ;
}
}
/// <summary>
/// 搜索完成,索引队列为空可以退出
/// </summary>
public static bool WaitToFinish
{
get
{
return m_WaitToFinish;
}
set
{
m_WaitToFinish = value ;
}
}
/// <summary>
/// 插件
/// </summary>
public static Hashtable Plugins
{
get
{
return plugins;
}
}
#endregion
#region 添加插件
/// <summary>
/// 添加插件
/// </summary>
/// <param name="plugin"></param>
public static void AddParserPlugin(IParserPlugin plugin)
{
// Log.Echo("Adding plugin for: " + plugin.Extensions[0]);
//if (IsRunning)
// throw new ApplicationException("The plugin cannot be added when IndexerQueue is running.");
foreach (string extension in plugin.Extensions)
{
if (!extension.StartsWith("."))
{
Log.Debug("Parser not loaded. Extension '" + extension + "' must start with a dot.");
continue;
}
string extLower = extension.ToLower();
if (!plugins.ContainsKey(extLower))
plugins[extLower] = plugin;
}
}
#endregion
#region 自动加载插件
/// <summary>
/// 自动加载插件
/// </summary>
public static void loadPlugins()
{
string pluginsDir = Directory.GetParent(Assembly.GetExecutingAssembly().Location) + "/plugins";
DirectoryInfo di = new DirectoryInfo(pluginsDir);
if (!di.Exists)
return;
FileInfo[] files = di.GetFiles("*.dll");
// ensure that the order is alphabetical
Array.Sort(files,new AlphabeticComparer());
foreach (FileInfo fi in files)
{
try
{
loadPlugin(fi.FullName);
}
catch (Exception e)
{
Log.Debug(e);
}
}
}
public static void loadPlugin(string path)
{
Assembly testasm = Assembly.LoadFrom(path);
Type[] asmtypes = testasm.GetTypes();
foreach (Type t in asmtypes)
{
if (t.IsClass & t.GetInterface("IParserPlugin") != null)
{
object o = testasm.CreateInstance(t.FullName);
IParserPlugin plugin = o as IParserPlugin;
if (plugin == null)
return;
IndexReptile.AddParserPlugin(plugin);
}
}
}
#endregion
#region 添加索引信息
/// <summary>
/// 添加索引信息
/// </summary>
/// <param name="p_strUrl">远程URL</param>
/// <param name="p_strFile">本地Cache</param>
/// <param name="p_Title">Html文件标题</param>
public static void IndexDocs(Uri p_Url,string p_strFile,string p_Title)
{
try
{
if(UpdateIndex)
{
DeleteDocuments(p_Url.ToString());
}
}
catch{}
IndexWriter writer = new IndexWriter(Config.IndexDirectory, new StandardAnalyzer(), false);
Document doc = new Document();
FileInfo fi= new FileInfo(p_strFile);
doc.Add(new Field("name",p_Title, true, true, true));
doc.Add(new Field("url", p_Url.ToString(), true, true, false));
doc.Add(new Field("cache", p_strFile.Substring(Config.CacheDirectory.Length), true,false, false));
doc.Add(Field.Keyword("created", DateTime.Now.ToString()));
//doc.Add(Field.Keyword("modified", DateTime.Now.ToString()));
doc.Add(Field.Keyword("extension",fi.Extension));
doc.Add(new Field("length", fi.Length.ToString(), true, false, false));
string strBody = getBody(fi);
//Console.WriteLine(strBody);
doc.Add(new Field("body", strBody, true, true, true));
try
{
writer.AddDocument(doc);
}
catch{}
finally
{
writer.Close();
}
//记录索引处理进度
Log.m_Console.SetURLCount(m_DocQueue.Count);
//假如没有启用映象就要删除下载的文件
if(!Config.EnableCache)
{
try
{
File.Delete(p_strFile);
}
catch
{}
}
}
/// <summary>
/// Save index information
/// </summary>
/// <param name="p_strUrl">远程URL</param>
/// <param name="p_strFile">本地Cache</param>
public static void IndexDocs(Uri p_Url,string p_strFile)
{
IndexDocs(p_Url,p_strFile,Tools.GetUrlName(p_Url));
}
#endregion
#region 删除已经存在的文档索引
/// <summary>
/// 删除已经存在的文档索引
/// </summary>
/// <param name="p_Url"></param>
private static void DeleteDocuments(string p_Url)
{
IndexReader r = IndexReader.Open(Config.IndexDirectory);
try
{
int deletedCount = r.Delete(new Term("url", p_Url));
//Console.WriteLine("DELETED: " + p_Url);
}
finally
{
r.Close();
}
}
#endregion
#region Init()初始化
/// <summary>
/// 初始化
/// </summary>
/// <param name="p_Type"></param>
public static void Init(int p_Type)
{
//Start();
if(!Config.AutoUpdateIndex || !File.Exists(Config.IndexDirectory + "\\segments"))
{
Console.WriteLine(File.Exists(Config.IndexDirectory + "\\segments"));
IndexWriter writer = new IndexWriter(Config.IndexDirectory, new StandardAnalyzer(), true);
writer.Close();
UpdateIndex = false ;
}
else
UpdateIndex = true ;
NeedStop = false ;
WaitToFinish = false ;
FreeTime = 0 ;
if(p_Type ==0)
{
//MainForm = p_Form ;
IndexReptile.loadPlugins();
}
//Console.WriteLine("test test test!!");
}
#endregion
#region optimizeIndex() 结束
/// <summary>
/// 结束
/// </summary>
public static void optimizeIndex()
{
IndexWriter writer = new IndexWriter(Config.IndexDirectory, new StandardAnalyzer(),false);
try
{
writer.Optimize();
}
catch{}
finally
{
writer.Close();
}
Log.m_Console.FinishIndex();
}
#endregion
#region StopIndex() 停止索引
/// <summary>
/// 停止索引
/// </summary>
public static void StopIndex()
{
NeedStop = true ;
}
#endregion
#region 获取文件文本
/// <summary>
/// 获取文件文本
/// </summary>
/// <param name="fi"></param>
/// <returns></returns>
private static string getBody(FileInfo fi)
{
try
{
IParserPlugin plugin = (IParserPlugin) IndexReptile.Plugins[fi.Extension.ToLower()];
if (plugin != null)
{
// if it is successful use the result
// Log.Echo("Calling plugin for " + fi.FullName);
return plugin.Extract(fi.FullName);
}
}
catch (Exception e)
{
Log.Debug(e);
}
// otherwise try the default parser
try
{
// Log.Echo("Calling default parser for " + fi.FullName);
return Parser.Parse(fi.FullName);
}
catch (Exception)
{
// if there is a problem with parsing we still index the file name and other attributes
return String.Empty;
}
}
#endregion
#region Convert html to txt
/// <summary>
/// 去除HTML标记
/// </summary>
/// <param name="strHtml">包括HTML的源码 </param>
/// <returns>已经去除后的文字</returns>
public static string StripHTML(string strHtml)
{
string temp = Regex.Replace(strHtml, "<[^>]*>", "");
return temp.Replace(" ", " ");
}
#endregion
#region 增加文件到索引队列
/// <summary>
/// 增加文件到索引队列
/// </summary>
/// <param name="p_Url"></param>
/// <param name="p_strFile"></param>
/// <param name="p_Title"></param>
public static void Add(Uri p_Url,string p_strFile,string p_Title)
{
ArrayList QueueElement = new ArrayList();
QueueElement.Add(p_Url);
QueueElement.Add(p_strFile);
QueueElement.Add(p_Title);
m_DocQueue.Enqueue(QueueElement);
if(!IsRunning)
Start();
}
#endregion
#region Run() 索引线程运行起点
/// <summary>
/// 索引线程运行起点
/// </summary>
public static void Run()
{
while (IsRunning)
{
if( m_DocQueue.Count != 0 && !NeedStop)
{
ArrayList QueueElement = (ArrayList) m_DocQueue.Dequeue() ;
Uri url = (Uri)QueueElement[0];
string FilePath = (string)QueueElement[1];
string Title = (string)QueueElement[2] ;
IndexDocs(url,FilePath,Title);
FreeTime = 0;
}
else
{
FreeTime ++ ;
Thread.Sleep(10);
if (NeedStop)
{
IsRunning = false ;
optimizeIndex();
}
if ( WaitToFinish && FreeTime >= 3000)
{
IsRunning = false ;
optimizeIndex();
}
}
}
}
#endregion
#region Start() 索引线程开始工作
/// <summary>
/// 索引线程开始工作
/// </summary>
public static void Start()
{
IsRunning = true;
threadIndexer = new Thread(new ThreadStart(Run));
threadIndexer.Name = "ShootIndexer";
threadIndexer.Start();
}
#endregion
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -