📄 pdflucenetest.java
字号:
package ch7.pdfbox;
import java.io.File;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.pdfbox.searchengine.lucene.LucenePDFDocument;
public class PdfLuceneTest {
public static void main(String[] args) {
try {
// IndexWriter存放索引到d:\index下
IndexWriter writer = new IndexWriter("d:\\index",
new StandardAnalyzer(), true);
// LucenePDFDocument返回由PDF产生的Lucene Docuement
Document d = LucenePDFDocument
.getDocument(new File("C:\\index.pdf"));
// 写入索引
writer.addDocument(d);
// 关闭索引文件流
writer.close();
// 读取d:\index下的索引文件建立IndexSearcher
IndexSearcher searcher = new IndexSearcher("d:\\index");
// 对索引的contents Field进行查找关键词poi
Term t = new Term("contents", "poi");
// 根据Term生成Query
Query q = new TermQuery(t);
// 搜索返回结果集
Hits hits = searcher.search(q);
// 打印结果集
for (int i = 0; i < hits.length(); i++) {
System.out.println(hits.doc(i));
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -