📄 txthandler.java
字号:
/**
* Use java io to read the .txt files.
*/
package cn.edu.nju.software.ruse;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
//import org.tartarus.snowball.SnowballStemmer;
/**
* @author spring
*
*/
public class TxtHandler implements ProcessTxt {
private File[] list;
/**
* A list to hold all the .txt files.
*/
private List<File> ll = new LinkedList<File>();
//private Map<String,HashSet<File>> hm;
private Index index;
public TxtHandler(Index index) {
this.list = index.getFileNameIndex();
this.index = index;
}
/**
* Put the term and the name of files into the map.
*/
public void processTxt() {
//System.err.println("@TxtHandler starting to processTxt().");
parseFiles();
initModTimeIndex();
initFileSizeIndex();
FileReader fr;
BufferedReader br;
String s;
Pattern p = Pattern.compile("[a-zA-Z]+");
Matcher m;
HashMap<File,HashSet<Integer>> tempMap;
HashSet<Integer> tempSet;
//SnowballStemmer stemmer = new EnglishStemmer().createStemmer();
HashMap<String, HashMap<File,HashSet<Integer>>> fileContentsIndex = index.getFileContentsIndex();
for(File item:ll) {
try {
fr = new FileReader(item);
br = new BufferedReader(fr);
int i = -1;
while((s = br.readLine()) != null) {
s = s.toLowerCase();
m = p.matcher(s);
while(m.find()) {
i++;
String term = m.group();
//////System.err.println("Before stemming: " + item + " " + term);
/**
* Handle the ordinary word.
* */
if(!fileContentsIndex.containsKey(term)) {
tempMap = new HashMap<File,HashSet<Integer>>();
tempSet = new HashSet<Integer>();
tempSet.add(new Integer(i));
tempMap.put(item, tempSet);
fileContentsIndex.put(term, tempMap);
} else {
tempMap = fileContentsIndex.get(term);
if(!tempMap.containsKey(item)) {
HashSet<Integer> temp = new HashSet<Integer>();
temp.add(new Integer(i));
tempMap.put(item, temp);
} else {
tempSet = tempMap.get(item);
tempSet.add(new Integer(i));
}
}
// /**
// * Handle the word after stemming.
// * */
// stemmer.setCurrent(term);
// stemmer.stem();
// term = stemmer.getCurrent();
// //System.out.println("After stemming: " + term);
// if(!hm.containsKey(term)) {
// temp = new HashSet<File>();
// temp.add(item);
// hm.put(term, temp);
// } else {
// temp = hm.get(term);
// if(!temp.contains(item)) {
// temp.add(item);
// }
// }
}
}
br.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//System.out.println(fileContentsIndex);
}
/**
* Cull other files but .txt.
*/
public void parseFiles() {
//System.err.println("@TxtHandler starting to parseFiles().");
Pattern p = Pattern.compile(".*\\.txt");
for(int i = 0;i < list.length;i++) {
if(p.matcher(list[i].getName()).matches()){
ll.add(list[i]);
}
}
// for(File item:ll) {
// System.out.println(item.getName());
// }
}
/**
* Initial the ModTimeIndex
* Map(modTime, Set[filename])
*/
private void initModTimeIndex() {
HashSet<File> temp;
DateFormat df = DateFormat.getDateFormat();
String dateString;
HashMap<String, HashSet<File>> modTimeIndex = index.getModTimeIndex();
for(File item:ll) {
dateString = df.format(item.lastModified());
//System.out.println("When parsing .txt files : " + dateString);
if(!modTimeIndex.containsKey(dateString)) {
temp = new HashSet<File>();
temp.add(item);
modTimeIndex.put(dateString, temp);
}else {
temp = modTimeIndex.get(dateString);
if(!temp.contains(item)) {
temp.add(item);
}
}
}
//System.out.println("When parsing .txt files : " + modTimeIndex);
}
/**
* Initial the FileSizeIndex
* Map(size, Set[filename])
*/
private void initFileSizeIndex() {
HashSet<File> temp;
Long l;
HashMap<Long, HashSet<File>> fileSizeIndex = index.getFileSizeIndex();
for(File item:ll) {
l = new Long(item.length());
//System.out.println("When parsing .txt files : " + l);
if(!fileSizeIndex.containsKey(l)) {
temp = new HashSet<File>();
temp.add(item);
fileSizeIndex.put(l, temp);
}else {
temp = fileSizeIndex.get(l);
if(!temp.contains(item)) {
temp.add(item);
}
}
}
//System.out.println("When parsing .txt files : " + fileSizeIndex);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -