📄 lucenemikanalyzertext.java
字号:
package chapter8;
import java.io.IOException;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.mira.lucene.analysis.MIK_CAnalyzer;
import java.util.*;
import java.io.*;
public class LuceneMIKAnalyzerText {
private static String Dest_Index_Path = "D:\\workshop\\TextIndex";
static protected String chinesedetail = "中文文档中最基础的结构是句子、短语、词汇、单个的汉字。中文环境的句子通常可以利用标点符号来分隔。" ;
public static void main(String[] args) {
try {
Analyzer TextIKAnalyzer = new MIK_CAnalyzer();
IndexWriter TextIndex = new IndexWriter(Dest_Index_Path,TextIKAnalyzer,true);
Document document = new Document();
Field field_content = new Field("content", chinesedetail,
Field.Store.YES,Field.Index.TOKENIZED);
document.add(field_content);
TextIndex.addDocument(document);
TokenStream stream = TextIKAnalyzer.tokenStream("content", new StringReader(chinesedetail));
while(true)
{
Token item = stream.next();
if(null == item ) break;
System.out.print("{"+item.termText()+"} ");
}
TextIndex.optimize();
TextIndex.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("");
System.out.println("Index success");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -