⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 luceneindexlocaldisk.java

📁 《lucene+nutch搜索引擎开发》源代码
💻 JAVA
字号:
package chapter5;

import java.io.IOException;
import java.io.File;
import java.io.FileReader;

import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;

/*******************************************************************
 * 本代码完成本地指定目录的遍历和文件查找。对指定后缀的文件进行分析,利用Lucene建立
 * 索引,为后续检索使用做好准备。
 *******************************************************************/
public class LuceneIndexLocalDisk {

	private static String Dest_Index_Path = "D:\\workshop\\TextIndex";
	private static String Text_File_Path  = "D:\\workshop\\ch2\\002\\";
	/*========================================================
	 * 主函数,指定索引目录和待分析的目录,生成Lucene索引
	 *========================================================*/
	public static void main(String[] args) {
		
		File indexpath = new File(Dest_Index_Path);
		File localPath = new File(Text_File_Path);
		
		try {
			int nums = indexBuilder(indexpath,localPath);
			System.out.println("Index Finished " + nums + "  docs");			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	/*========================================================
	 * 索引创建函数,生成IndexWriter创建索引,调用子目录索引函数,并优化
	 * 存储本地磁盘索引
	 *========================================================*/
	public static int indexBuilder( File indexPath , File localPath ) 
	throws IOException{
		if(!localPath.exists() || !localPath.isDirectory() || !localPath.canRead()){
			throw new IOException(localPath + "不存在或者不允许访问" );
		}
		System.out.println("目标路径完好");		
		IndexWriter FSWriter = new IndexWriter(indexPath,new StandardAnalyzer(),true);
		FSWriter.setUseCompoundFile(true);

		SubindexBuilder(FSWriter,localPath);
		int num =  FSWriter.docCount();
		FSWriter.optimize();
		FSWriter.close();
		return num;
	}
	/*========================================================
	 * 递归函数,递归分析目录,如果找到子目录,继续递归;如果找到文件分析索引
	 *========================================================*/
	private static void  SubindexBuilder(IndexWriter fswriter,File subPath)  
	throws IOException{
	
		File[] filelist = subPath.listFiles();
		
		System.out.println(subPath.getAbsolutePath() + "路径个数 " + filelist.length);
		for(int i = 0; i< filelist.length;i++){
			File file = filelist[i];
			if(file.isDirectory()){
				SubindexBuilder(fswriter,file);			
			} else if(IsValidType(file.getName())){
				fileindexBuilder(fswriter,file);
			}
		}
	}
	
	/*========================================================
	 * 创建RAM内存索引,生成并添新文档。合并到本地磁盘索引当中
	 *========================================================*/
	private static void  fileindexBuilder(IndexWriter fswriter,File subfile)  
	throws IOException{
	
		if( subfile.isHidden() || !subfile.exists() || !subfile.canRead()){
			return ;
		}
		
		Directory ramdirectory = new RAMDirectory();
		IndexWriter RAMWriter = new IndexWriter(ramdirectory,new StandardAnalyzer(),true);
		
//		File file = new File(subfile);
        FileReader fpReader = new FileReader(subfile);
		
		System.out.println("创建索引" + subfile.getCanonicalPath());
		Document document = new Document();
		
		Field field_name = new Field("name", subfile.getName(), 
				Field.Store.YES,Field.Index.UN_TOKENIZED);
		document.add(field_name);
		
		Field field_path = new Field("path", subfile.getAbsolutePath(), 
				Field.Store.YES,Field.Index.UN_TOKENIZED);
		document.add(field_path);

		Field field_content = new Field("content", fpReader);
		document.add(field_content);
		RAMWriter.addDocument(document);
		RAMWriter.close();

		fswriter.addIndexes(new Directory[]{ramdirectory});

	}
	/*========================================================
	 * 判断当前文件名是否符合文件后缀要求
	 *========================================================*/
	private static boolean IsValidType(String name){
			if(name.endsWith(".txt") || name.endsWith(".html")|| name.endsWith(".ini") ||name.endsWith(".conf")){
				return true;
			} else {
				return false;
			}
	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -