📄 lucenestandardanalyzertext.java
字号:
package chapter8;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import java.util.*;
import java.io.*;
public class LuceneStandardAnalyzerText {
private static String Dest_Index_Path = "D:\\workshop\\TextIndex";
static protected String textdetail = "The Lucene works very well,it is very useful.我们认为很好用。" ;
static protected String chinesedetail = "中文文档中最基础的结构是句子、短语、词汇、单个的汉字。中文环境的句子通常可以利用标点符号来分隔。" ;
public static final String[] self_stop_words = {
"a", "an", "and", "are", "as", "at", "be", "but", "by",
"for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "such",
"that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with",
"very"
};
public static void IndexBuilder() {
try {
Analyzer TextAnalyzer = new StandardAnalyzer();
IndexWriter TextIndex = new IndexWriter(Dest_Index_Path,TextAnalyzer,true);
Document document = new Document();
Field field_content = new Field("content", chinesedetail,
Field.Store.YES,Field.Index.TOKENIZED);
document.add(field_content);
TextIndex.addDocument(document);
ArrayList ItemList = new ArrayList();
TokenStream stream = TextAnalyzer.tokenStream("content", new StringReader(chinesedetail));
while(true)
{
Token item = stream.next();
if(null == item ) break;
System.out.print("{"+item.termText()+"} ");
}
TextIndex.optimize();
TextIndex.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("");
System.out.println("Index success");
}
/*================================================================
* 名 称:QueryStandardAnalyzerTest
* 功 能:构造检索查询器,使用指定的分析器对检索词进行分析,找到相应结果输出。
===============================================================*/
public static void QueryStandardAnalyzerTest(){
try {
Analyzer analyzer = new StandardAnalyzer();
IndexSearcher searcher = new IndexSearcher(Dest_Index_Path);
String searchWords = "中文文档";
QueryParser parser = new QueryParser("content",analyzer);
try{
Query query = parser.parse(searchWords);
System.out.println(query.toString());
//System.out.println(query.getClass());
Hits hits = searcher.search(query);
System.out.println("Search result:");
for(int i=0; i < hits.length(); i++)
{
System.out.println(hits.doc(i).getField("content"));
}
} catch(ParseException e1){
e1.printStackTrace();
}
}catch (IOException e) {
e.printStackTrace();
}
System.out.println("Search success");
}
public static void main(String[] args) {
IndexBuilder();
QueryStandardAnalyzerTest();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -