📄 luceneindextypes.java

📁 《lucene+nutch搜索引擎开发》源代码
💻 JAVA
字号:
package chapter5;

import java.util.Date;
import java.io.*;

import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;

public class LuceneIndexTypes {

	private static String Dest_Index_Path = "D:\\workshop\\TextIndexstore";
	private static String Dest_Index_Path_un = "D:\\workshop\\TextIndexunstore";
	private static String Dest_Index_Path_Zip = "D:\\workshop\\TextIndexZip";
	private static String Text_File_Path = "D:\\workshop\\ch2\\wholeaximofu.txt";

	public static void main(String[] args) {
		      
		try {
			indexstore();
			indexunstore();
			indexcompress();
			System.out.println(" index sucess.");
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public static void indexstore() throws IOException
	{
		File file = new File(Text_File_Path);  // 原始文件
        Directory dir = FSDirectory.getDirectory(Dest_Index_Path,false); // 索引目录
        Analyzer TextAnalyzer = new SimpleAnalyzer();                    // 文档分析器
        IndexWriter TextIndex = new IndexWriter(dir,TextAnalyzer,true);  // 生成索引器对象
        TextIndex.setUseCompoundFile(true);
		Document document = new Document();                              // 新建空文档

		Field field_name = new Field("path", file.getName(), 
				Field.Store.YES,Field.Index.UN_TOKENIZED);
		document.add(field_name);                                        // 添加文件名域

		FileInputStream inputfile=new FileInputStream(file);             // 文件输入流
		int len=inputfile.available();
		byte[] buffer = new byte[len]; 
		inputfile.read(buffer);                                          // 读取文件内容
		inputfile.close();
		
		String contentext = new String(buffer);
		Field field_content = new Field( "content", contentext,          // 文本域保存内容
				                         Field.Store.YES,Field.Index.TOKENIZED );
		document.add(field_content);                                    // 添加文件内容域
		TextIndex.addDocument(document);                                // 添加索引文档
		TextIndex.optimize();
		TextIndex.close();
		
		System.out.println("########## Index Stored ##########");
		display(Dest_Index_Path , file.getName());
	}
	
	public static void indexunstore( ) throws IOException
	{
		File file = new File(Text_File_Path);  // 原始文件
        Directory dir = FSDirectory.getDirectory(Dest_Index_Path_un,false); // 索引目录
        Analyzer TextAnalyzer = new SimpleAnalyzer();                    // 文档分析器
        IndexWriter TextIndex = new IndexWriter(dir,TextAnalyzer,true);  // 生成索引器对象
        TextIndex.setUseCompoundFile(true);
		Document document = new Document();                              // 新建空文档

		Field field_name = new Field("path", file.getName(), 
				Field.Store.YES,Field.Index.UN_TOKENIZED);
		document.add(field_name);                                        // 添加文件名域

		FileInputStream inputfile=new FileInputStream(file);             // 文件输入流
		int len=inputfile.available();
		byte[] buffer = new byte[len]; 
		inputfile.read(buffer);                                          // 读取文件内容
		inputfile.close();
		
		String contentext = new String(buffer);
		Field field_content = new Field( "content", contentext,          // 文本域不保存内容
				                         Field.Store.NO,Field.Index.TOKENIZED );
		document.add(field_content);                                    // 添加文件内容域
		TextIndex.addDocument(document);                                // 添加索引文档
		TextIndex.optimize();
		TextIndex.close();
		
		System.out.println("########## Index UnStored ##########");
		
		display(Dest_Index_Path_un , file.getName());		
	}
	public static void indexcompress() throws IOException
	{
		File file = new File(Text_File_Path);  // 原始文件
        Directory dir = FSDirectory.getDirectory(Dest_Index_Path_Zip,false); // 索引目录
        Analyzer TextAnalyzer = new SimpleAnalyzer();                    // 文档分析器
        IndexWriter TextIndex = new IndexWriter(dir,TextAnalyzer,true);  // 生成索引器对象
        TextIndex.setUseCompoundFile(true);
		Document document = new Document();                              // 新建空文档

		Field field_name = new Field("path", file.getName(), 
				Field.Store.YES,Field.Index.UN_TOKENIZED);
		document.add(field_name);                                        // 添加文件名域

		FileInputStream inputfile=new FileInputStream(file);             // 文件输入流
		int len=inputfile.available();
		byte[] buffer = new byte[len]; 
		inputfile.read(buffer);                                          // 读取文件内容
		inputfile.close();
		
		String contentext = new String(buffer);
		Field field_content = new Field( "content", contentext,          // 文本域保存内容
				                         Field.Store.COMPRESS,Field.Index.TOKENIZED );
		document.add(field_content);                                    // 添加文件内容域
		TextIndex.addDocument(document);                                // 添加索引文档
		TextIndex.optimize();
		TextIndex.close();
		
		System.out.println("########## Index Compress ##########");
		display(Dest_Index_Path_Zip , file.getName());
		
	}
	
	public static void display(String indexpath, String words) throws IOException
	{	// 显示结果
		try {		
			IndexSearcher searcher = new IndexSearcher( indexpath ); // 检索器
			Term term = new Term("path", words );                          // 单词项
			Query query = new TermQuery(term);                             // 检索单元 
			System.out.println("Query  words:");
			System.out.println("  " + query.toString());
			Hits hits = searcher.search(query);                            // 提交检索
			System.out.println("Search result:");
			for(int i=0; i < hits.length(); i++)                           // 输出结果
			{
				System.out.println("  Path: " + hits.doc(i).getField("path").stringValue());
				if( hits.doc(i).getField("content")!= null)
				  System.out.println("  Content: " + hits.doc(i).getField("content").stringValue());			
				
			}
		} catch (IOException e)
		{
			e.printStackTrace();
		}
	}
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -