📄 defaultindexer.java
字号:
package invertedList;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.SortedSet;
import java.util.StringTokenizer;
import java.util.TreeSet;
import stemmer.EnglishStemmer;
/**
* @author Administrator an default implementation of AbstractIndexer
*/
public final class DefaultIndexer extends AbstractIndexer {
// private Map<String,Set<String>> vagueInvertedList;
/**
* a copy instance of the allInvertedList in class InvertedListManager
*/
private Map<String, Map<FileRecord, SortedSet<Integer>>> allInvertedList;
public DefaultIndexer(Map<String, Map<FileRecord, SortedSet<Integer>>> all) {
stemmer = new EnglishStemmer();
factory = new TextExtractorFactory();
// this.vagueInvertedList = vague;
this.allInvertedList = all;
}
/*
* (non-Javadoc)
*
* @see invertedList.AbstractIndexer#doindex(java.io.File)
*/
public void doindex(File file) {
FileRecord record = new FileRecord(file);
// get the extractor according to the file's name
Extractor extractor = factory.createExtractor(record.getName());
String target = extractor.getInformation(file);
// now, target contains the content of the file
StringTokenizer tokenizer = new StringTokenizer(target,
" .,;?!\"\'@#$%^&*()-");
int position = 0;
while (tokenizer.hasMoreTokens()) {
String word = tokenizer.nextToken().toLowerCase();
//String word = tokenizer.nextToken();
position++;
//String stemword = stemmer.doStem(word);
if (!allInvertedList.containsKey(word)) {
Map<FileRecord, SortedSet<Integer>> map = new HashMap<FileRecord, SortedSet<Integer>>();
allInvertedList.put(word, map);
}
if (allInvertedList.get(word).containsKey(record)) {
allInvertedList.get(word).get(record).add(position);
} else {
SortedSet<Integer> newSet = new TreeSet<Integer>();
newSet.add(position);
allInvertedList.get(word).put(record, newSet);
}
/*
* Because of the stemming process is no longer needed, I delete it.
* if(allInvertedList.containsKey(stemword)){
* allInvertedList.get(stemword).add(filename); } else{ Set<String>
* newSet = new HashSet<String>(); newSet.add(filename);
* allInvertedList.put(stemword, newSet); }
*/
/*
* //the word then been stemmed and then add to the vagueList word =
* stemmer.doStem(word); if(vagueInvertedList.containsKey(word)){
* vagueInvertedList.get(word).add(filename); } else{ Set<String>
* newSet = new HashSet<String>(); newSet.add(filename);
* vagueInvertedList.put(word, newSet); }
*/
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -