luceneexceldocument.java

来自「在lucene环境下把excel文件转成txt文件的源代码」· Java 代码 · 共 52 行

JAVA
52
字号
/* * To change this template, choose Tools | Templates * and open the template in the editor. */package lucenesearch;import java.io.*;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.poi.poifs.filesystem.POIFSFileSystem;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.hssf.extractor.ExcelExtractor;/** * * @author BileiZhu */public class LuceneEXCELDocument {    public static Document getDocument(File doc) {        String docPath = doc.getAbsolutePath();        String title = doc.getName();        FileInputStream inputStream = null;        Reader contents = null;        Document document = new Document();        try {            inputStream = new FileInputStream(doc);        } catch (FileNotFoundException e) {            System.out.println(e);        }        try {            POIFSFileSystem reader = new POIFSFileSystem(inputStream);            HSSFWorkbook wb = new HSSFWorkbook(reader);            ExcelExtractor extractor = new ExcelExtractor(wb);            extractor.setFormulasNotResults(true);            extractor.setIncludeSheetNames(false);            String textString = extractor.getText();            contents = new StringReader(textString);        } catch (Exception e) {            System.out.println(e);        }        document.add(new Field("path", docPath, Field.Store.YES, Field.Index.NO));        document.add(new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED));        document.add(new Field("contents", contents));        document.add(new Field("information", docPath + Long.toString(doc.lastModified()), Field.Store.YES, Field.Index.UN_TOKENIZED));        return document;    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?