⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 index.java

📁 对一个文件夹下面的doc和txt文件(文件夹下不包含文件夹)中的内容建立索引并实现了搜索功能。(代码有缺陷
💻 JAVA
字号:
package src;
import java.io.*;
import java.util.*;
import java.util.Map.Entry;

import org.apache.poi.hwpf.extractor.WordExtractor;
import org.tartarus.snowball.SnowballStemmer;
import org.tartarus.snowball.ext.englishStemmer;

/**
 * @author Administrator
 *
 */

public class Index {
	File directory = new File("test-files");
	File[] files = directory.listFiles();
	HashMap<String,TreeSet<File>> map = new HashMap<String, TreeSet<File>>();
	public  void generateIndex(){
		
		for (int i = 0;i < files.length;i ++){
			//Deal with the doc documents
			String extension = files[i].toString().substring(files[i].toString().length() - 4);
			if (extension.equalsIgnoreCase(".doc")){
				try{
					FileInputStream is = new FileInputStream(files[i]);
					WordExtractor extractor = new WordExtractor(is);
					StringTokenizer text = new StringTokenizer(extractor.getText()," ,.<>/?;:'[{]}\\|-_=+`~,《。》、?;:‘“”【{】}、|-——=+!@#$%^&*()\"\r\t\n");
					dealWithAFile(text, files[i]);	
				}catch(IOException e){
					System.out.println(e);
				}
			}
			//Deal with the txt documents
			else if (extension.equalsIgnoreCase(".txt")){
				try{
					BufferedReader br = new BufferedReader( new  FileReader (files[i]) );    
			        StringBuffer sb = new StringBuffer();    
			        String line = br.readLine();    
			        while (line !=null){    
			                
			            sb.append(line);    
			            line = br.readLine();    
			        }
			        br.close();
			        StringTokenizer text = new StringTokenizer(sb.toString()," ,.<>/?;:'[{]}\\|-_=+`~,《。》、?;:‘“”【{】}、|-——=+!@#$%^&*()\"\r\t\n");
			        dealWithAFile(text, files[i]);
				}catch(IOException e){
					System.out.println(e);
				}
			}
		}
		storeInFile();
	}
	/*Deal with a file */
	public void dealWithAFile(StringTokenizer text,File file){
		while (text.hasMoreTokens()){
			SnowballStemmer stemmer = new englishStemmer();
			stemmer.setCurrent(text.nextToken().toLowerCase());
			stemmer.stem();
			String token = stemmer.getCurrent();
			//When the key word has a map item
			if (map.containsKey(token)){
				map.get(token).add(file);
			}
			//When the key word doesn't has a map item
			else{
				TreeSet<File> temp = new TreeSet<File>();
				temp.add(file);
				map.put(token, temp);
				
			}
		}
	}
	private void storeInFile(){
//		System.out.println(map.size());
		try{
			Iterator<Entry<String, TreeSet<File>>> iterator = map.entrySet().iterator();
			FileWriter index = new FileWriter("index");
			while (iterator.hasNext()){
				Entry<String, TreeSet<File>> temp = iterator.next();
				index.append(temp.getKey() + ":");
				Iterator<File> iteratorFile = temp.getValue().iterator();
				while (iteratorFile.hasNext()){
					String path = iteratorFile.next().getPath();
					index.append(path.substring(path.indexOf("\\") + 1) + "|");
				}
				index.append("\n");
			}
			
			index.close();
		}catch(IOException e){
			System.out.println(e);
		}
	}
	public static void main(String args[]){
		Index i = new Index();
		i.generateIndex();
	}
	
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -