📄 luceneindexlocaldisk.java
字号:
package chapter5;
import java.io.IOException;
import java.io.File;
import java.io.FileReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
/*******************************************************************
* 本代码完成本地指定目录的遍历和文件查找。对指定后缀的文件进行分析,利用Lucene建立
* 索引,为后续检索使用做好准备。
*******************************************************************/
public class LuceneIndexLocalDisk {
private static String Dest_Index_Path = "D:\\workshop\\TextIndex";
private static String Text_File_Path = "D:\\workshop\\ch2\\002\\";
/*========================================================
* 主函数,指定索引目录和待分析的目录,生成Lucene索引
*========================================================*/
public static void main(String[] args) {
File indexpath = new File(Dest_Index_Path);
File localPath = new File(Text_File_Path);
try {
int nums = indexBuilder(indexpath,localPath);
System.out.println("Index Finished " + nums + " docs");
} catch (IOException e) {
e.printStackTrace();
}
}
/*========================================================
* 索引创建函数,生成IndexWriter创建索引,调用子目录索引函数,并优化
* 存储本地磁盘索引
*========================================================*/
public static int indexBuilder( File indexPath , File localPath )
throws IOException{
if(!localPath.exists() || !localPath.isDirectory() || !localPath.canRead()){
throw new IOException(localPath + "不存在或者不允许访问" );
}
System.out.println("目标路径完好");
IndexWriter FSWriter = new IndexWriter(indexPath,new StandardAnalyzer(),true);
FSWriter.setUseCompoundFile(true);
SubindexBuilder(FSWriter,localPath);
int num = FSWriter.docCount();
FSWriter.optimize();
FSWriter.close();
return num;
}
/*========================================================
* 递归函数,递归分析目录,如果找到子目录,继续递归;如果找到文件分析索引
*========================================================*/
private static void SubindexBuilder(IndexWriter fswriter,File subPath)
throws IOException{
File[] filelist = subPath.listFiles();
System.out.println(subPath.getAbsolutePath() + "路径个数 " + filelist.length);
for(int i = 0; i< filelist.length;i++){
File file = filelist[i];
if(file.isDirectory()){
SubindexBuilder(fswriter,file);
} else if(IsValidType(file.getName())){
fileindexBuilder(fswriter,file);
}
}
}
/*========================================================
* 创建RAM内存索引,生成并添新文档。合并到本地磁盘索引当中
*========================================================*/
private static void fileindexBuilder(IndexWriter fswriter,File subfile)
throws IOException{
if( subfile.isHidden() || !subfile.exists() || !subfile.canRead()){
return ;
}
Directory ramdirectory = new RAMDirectory();
IndexWriter RAMWriter = new IndexWriter(ramdirectory,new StandardAnalyzer(),true);
// File file = new File(subfile);
FileReader fpReader = new FileReader(subfile);
System.out.println("创建索引" + subfile.getCanonicalPath());
Document document = new Document();
Field field_name = new Field("name", subfile.getName(),
Field.Store.YES,Field.Index.UN_TOKENIZED);
document.add(field_name);
Field field_path = new Field("path", subfile.getAbsolutePath(),
Field.Store.YES,Field.Index.UN_TOKENIZED);
document.add(field_path);
Field field_content = new Field("content", fpReader);
document.add(field_content);
RAMWriter.addDocument(document);
RAMWriter.close();
fswriter.addIndexes(new Directory[]{ramdirectory});
}
/*========================================================
* 判断当前文件名是否符合文件后缀要求
*========================================================*/
private static boolean IsValidType(String name){
if(name.endsWith(".txt") || name.endsWith(".html")|| name.endsWith(".ini") ||name.endsWith(".conf")){
return true;
} else {
return false;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -