📄 lucenepaodingstore.java
字号:
package com.laozizhu.search.store;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.LockObtainFailedException;
import com.laozizhu.search.Item;
import com.laozizhu.search.ItemBase;
import com.laozizhu.search.SearchResult;
import com.laozizhu.search.SearchResultBase;
import com.laozizhu.search.Store;
/**
* 基于庖丁解牛的Lucene 2.4的全文搜索代码。
*
* @author 老紫竹研究室(laozizhu.com)
*/
public class LucenePaoDingStore implements Store {
private static final Object lock = new Object();
// 索引目录
private String indexPath = "d:/indexpaoding/lzzSearch";
public String getIndexPath() {
return indexPath;
}
public void setIndexPath(String indexPath) {
this.indexPath = indexPath;
}
/**
* 最大的搜索结果数量
*/
private int maxResult = 1000;
public int getMaxResult() {
return maxResult;
}
public void setMaxResult(int maxResult) {
this.maxResult = maxResult;
}
public synchronized Analyzer getAnalyzer() {
return new PaodingAnalyzer();
}
private int totalAdd = 0;
// 添加超过这个数量,则优化一次索引
private int totalToOptimize = 20;
private static final String FIELD_URL = "url";
private static final String FIELD_AUTHOR = "author";
private static final String FIELD_TITLE = "title";
private static final String FIELD_DATETIMECREATE = "datetimeCreate";
private static final String FIELD_BODY = "body";
private IndexWriter getIndexWriter() throws CorruptIndexException, LockObtainFailedException, IOException {
return new IndexWriter(indexPath, getAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
}
public synchronized boolean save(Item item) {
if (item == null || item.getAuthor() == null || item.getTitle() == null || item.getBody() == null
|| item.getDatetimeCreate() == null) {
return false;
}
IndexWriter writer = null;
synchronized (lock) {
try {
// 先删除以前的数据
deleteByUrl(item.getUrl());
// 增加数据
writer = getIndexWriter();
writer.setMaxFieldLength(10000000);
Document doc = new Document();// 一个文档相当与表的一条记录
doc.add(new Field(FIELD_URL, item.getUrl(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(FIELD_AUTHOR, item.getAuthor(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(FIELD_TITLE, item.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(FIELD_DATETIMECREATE, item.getDatetimeCreate(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(FIELD_BODY, item.getBody(), Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
totalAdd++;
if (totalAdd > getTotalToOptimize()) {
writer.optimize();// 优化
totalAdd = 0;
}
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
} finally {
try {
if (writer != null) {
writer.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public void optimize() {
IndexWriter writer = null;
synchronized (lock) {
try {
writer = getIndexWriter();
writer.optimize();// 优化
} catch (IOException e) {
System.out.println(e);
} finally {
try {
if (writer != null) {
writer.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public synchronized void deleteById(int id) {
IndexReader indexReader = null;
synchronized (lock) {
try {
indexReader = IndexReader.open(indexPath);
indexReader.deleteDocument(id);
} catch (IOException e) {} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
}
public synchronized void deleteByUrl(String url) {
IndexReader indexReader = null;
synchronized (lock) {
try {
indexReader = IndexReader.open(indexPath);
Item item = searchByUrl(url);
System.out.println("Delete..." + url);
if (item != null) {
indexReader.deleteDocument(item.getId());
System.out.println("OK");
}
} catch (IOException e) {
System.out.println(e);
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
}
public List<Item> findAll(int begin, int number) {
IndexReader indexReader = null;
List<Item> list = new LinkedList<Item>();
try {
indexReader = IndexReader.open(indexPath);
Item item;
Document doc;
for (int i = begin;; i++) {
if (indexReader.isDeleted(i)) {
continue;
}
doc = indexReader.document(i);
if (doc == null) {
continue;
}
item = new ItemBase();
item.setId(i);
item.setUrl(doc.get(FIELD_URL));
item.setAuthor(doc.get(FIELD_AUTHOR));
item.setTitle(doc.get(FIELD_TITLE));
item.setDatetimeCreate(doc.get(FIELD_DATETIMECREATE));
list.add(item);
number--;
if (number <= 0) {
break;
}
}
return list;
} catch (Exception e) {
e.printStackTrace();
return list;
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public Item searchById(int id) {
IndexReader indexReader = null;
try {
indexReader = IndexReader.open(indexPath);
Document doc = indexReader.document(id);
Item item = null;
if (doc != null) {
item = new ItemBase();
item.setId(id);
item.setUrl(doc.get(FIELD_URL));
item.setAuthor(doc.get(FIELD_AUTHOR));
item.setTitle(doc.get(FIELD_TITLE));
item.setDatetimeCreate(doc.get(FIELD_DATETIMECREATE));
item.setBody(doc.get(FIELD_BODY));
}
return item;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public Item searchByUrl(String url) {
IndexSearcher isearcher = null;
try {
// 替换一些特殊字符,比如冒号等
url = encodeURLForLucene(url);
isearcher = new IndexSearcher(indexPath);
QueryParser parser = new QueryParser(FIELD_URL, getAnalyzer());
Query query = parser.parse(url);
// 下面的这个方法已经不推荐使用了。
// Hits hits = isearcher.search(query);
// 改为如下的方式
TopDocCollector collector = new TopDocCollector(1);
isearcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
Item item = null;
if (hits.length > 0) {
Document doc = isearcher.doc(hits[0].doc);
item = new ItemBase();
item.setId(hits[0].doc);
item.setUrl(doc.get(FIELD_URL));
item.setAuthor(doc.get(FIELD_AUTHOR));
item.setTitle(doc.get(FIELD_TITLE));
item.setDatetimeCreate(doc.get(FIELD_DATETIMECREATE));
item.setBody(doc.get(FIELD_BODY));
}
return item;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
if (isearcher != null) {
try {
isearcher.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public SearchResult seach(String queryString, int begin, int number) {// 根据关键字搜索
if (queryString == null) {
return null;
}
if (begin >= getMaxResult() || number < 1) {
return null;
}
queryString = removeIllegleCharForLucene(queryString);
if (queryString.length() < 1) {
return null;
}
IndexSearcher isearcher = null;
SearchResult sr = new SearchResultBase();
try {
isearcher = new IndexSearcher(indexPath);
/* 下面这个表示要同时搜索这两个域,而且只要一个域里面有满足我们搜索的内容就行 */
BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
TopDocCollector collector = new TopDocCollector(getMaxResult());
Query query = MultiFieldQueryParser.parse(queryString, new String[] { FIELD_TITLE, FIELD_BODY }, clauses, getAnalyzer());
isearcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
sr.setTotal(hits.length);
List<Item> rtn = new ArrayList<Item>();
Set<String> urls = new HashSet<String>();
Item o;
// 用这个进行高亮显示,默认是<b>..</b>
// 用这个指定<read>..</read>
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<read>", "</read>");
// 构造高亮
// 指定高亮的格式
// 指定查询评分
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
// 这个一般等于你要返回的,高亮的数据长度
// 如果太小,则只有数据的开始部分被解析并高亮,且返回的数据也少
// 太大,有时太浪费了。
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
for (int i = begin; i < hits.length && i < begin + number; i++) {
Document doc = isearcher.doc(hits[i].doc);
if (!urls.contains(doc.get(FIELD_URL))) {
o = new ItemBase();
o.setId(hits[i].doc);
o.setUrl(doc.get(FIELD_URL));
o.setAuthor(doc.get(FIELD_AUTHOR));
o.setDatetimeCreate(doc.get(FIELD_DATETIMECREATE));
// 有三个参数
// 分析器
// 要解析的字段名
// 要解析的数据
o.setTitle(highlighter.getBestFragment(getAnalyzer(), FIELD_TITLE, doc.get(FIELD_TITLE)));
if (o.getTitle() == null) {
o.setTitle(doc.get(FIELD_TITLE));
}
o.setBody(highlighter.getBestFragment(getAnalyzer(), FIELD_BODY, doc.get(FIELD_BODY)));
if (o.getBody() == null) {
o.setBody(doc.get(FIELD_BODY));
}
urls.add(o.getUrl());
rtn.add(o);
}
}
sr.setReturnList(rtn);
return sr;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
if (isearcher != null) {
try {
isearcher.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 对能引起Lucene搜索引起错误的字符进行处理.<br>
* 目前只发现了冒号和问号。
*
* @param url
* @return
*/
private static String encodeURLForLucene(String url) {
return url.replace(":", "\\:").replace("?", "\\?");
}
private static String removeIllegleCharForLucene(String keyword) {
return keyword.replaceAll("([:\\*\\?\\[\\]\\.\\$\\{\\}])", "");
}
public int getTotalToOptimize() {
return totalToOptimize;
}
public void setTotalToOptimize(int totalToOptimize) {
this.totalToOptimize = totalToOptimize;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -