📄 lucenestopanalyzertext.java
字号:
package chapter8;
import java.io.IOException;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.util.*;
import java.io.*;
public class LuceneStopAnalyzerText {
private static String Dest_Index_Path = "D:\\workshop\\TextIndex";
static protected String textdetail = "The Lucene works very well,it is very useful." ;
public static final String[] self_stop_words = {
"a", "an", "and", "are", "as", "at", "be", "but", "by",
"for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "such",
"that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with",
"very"
};
public static void IndexBuilder() {
try {
// Analyzer TextAnalyzer = new StopAnalyzer();
Analyzer TextAnalyzer = new StopAnalyzer(self_stop_words); // 构造自定义停用词表的分析器
IndexWriter TextIndex = new IndexWriter(Dest_Index_Path,TextAnalyzer,true);
Document document = new Document(); // 生成空文档
Field field_content = new Field("content", textdetail, // 创建域对象
Field.Store.YES,Field.Index.TOKENIZED);
document.add(field_content);
TextIndex.addDocument(document); // 添加索引文档
ArrayList ItemList = new ArrayList(); // 结果集合
TokenStream stream = TextAnalyzer.tokenStream("content", new StringReader(textdetail));
while(true)
{
Token item = stream.next();
if(null == item ) break;
System.out.print("{"+item.termText()+"} ");
}
TextIndex.optimize();
TextIndex.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("");
System.out.println("Index success");
}
/*================================================================
* 名 称:QueryStopAnalyzerTest
* 功 能:构造检索查询器,使用指定的分析器对检索词进行分析,找到相应结果输出。
===============================================================*/
public static void QueryStopAnalyzerTest(){
try {
Analyzer analyzer = new StopAnalyzer();
IndexSearcher searcher = new IndexSearcher(Dest_Index_Path);
String searchWords = "The Lucene works";
QueryParser parser = new QueryParser("content",analyzer);
try{
Query query = parser.parse(searchWords);
System.out.println(query.toString());
System.out.println(query.getClass());
Hits hits = searcher.search(query);
System.out.println("Search result:");
for(int i=0; i < hits.length(); i++)
{
System.out.println(hits.doc(i).getField("content"));
}
} catch(ParseException e1){
e1.printStackTrace();
}
}catch (IOException e) {
e.printStackTrace();
}
System.out.println("Search success");
}
public static void main(String[] args) {
IndexBuilder();
QueryStopAnalyzerTest();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -