📄 operator_proximity.java
字号:
/**
*
*/
package cn.edu.nju.software.ruse;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author spring
*
*/
public class Operator_Proximity extends Operator {
/* (non-Javadoc)
* @see cn.edu.nju.software.ruse.Operator#getPRI()
*/
public Operator_Proximity() {
desription = "PROXIMITY";
PRI = PRI_PROXIMITY;
}
public int getPRI() {
return PRI;
}
public HashSet<File> getFileSet(String expre, Index index) {
//System.err.println("@Operator_Proximity Starting to getFileSet from the index!");
expre = expre.trim();
expre = expre.toLowerCase();
HashMap<String, HashMap<File,HashSet<Integer>>> fileContentsIndex = index.getFileContentsIndex();
List<String> word = new ArrayList<String>();
HashSet<File> result = new HashSet<File>();
HashSet<File> resultProcessed = new HashSet<File>();
Pattern p = Pattern.compile("\\b\\w+\\b");
Matcher m = p.matcher(expre);
int j = -1;
while(m.find()) {
j++;
word.add(j, m.group());
}
int distance;
String s = word.get(word.size() - 1);
if(Character.isDigit(s.charAt(0))) {
distance = Integer.parseInt(s);
word.remove(word.size() - 1);
} else {
distance = 0;
}
/**
* do AND operation for all the words, and get the result Set.
* */
result.addAll(fileContentsIndex.get(word.get(0)).keySet());
for(int i = 1;i < word.size();i++) {
s = word.get(i);
result.retainAll(fileContentsIndex.get(s).keySet());
}
/**
* calculate the distance from the result files, which do it from word[0]
* */
Iterator<File> it = result.iterator();
while(it.hasNext()) {
resultProcessed.add(it.next());
}
HashMap<File,HashSet<Integer>> hm;
HashSet<Integer> ts;
String w;
HashMap<Integer,String> pos = new HashMap<Integer,String>();
int maxPos = 0;
String test = new String();
String regex = new String();
for(int i = 0;i < word.size();i++) {
if(i == word.size() - 1) {
regex += "(" + word.get(i) + ")";
} else {
regex += "(" + word.get(i) + ")" + "@{0," + distance + "}";
}
}
Pattern p_test = Pattern.compile(regex);
Matcher m_test;
it = result.iterator();
while(it.hasNext()) {
File f = it.next();
pos.clear();
test = "";
for(int i = 0;i < word.size();i++) {
w = word.get(i);
hm = fileContentsIndex.get(w);
ts = hm.get(f);
Iterator<Integer> itTS = ts.iterator();
while(itTS.hasNext()) {
Integer position = itTS.next();
if(position.intValue() > maxPos) {
maxPos = position.intValue();
}
pos.put(position.intValue(), w);
}
}
int k = 0;
String temps;
while(k <= maxPos) {
temps = pos.get(k);
if(temps == null) {
test += "@";
} else {
test += temps;
}
k++;
}
m_test = p_test.matcher(test);
if(!m_test.find()) {
resultProcessed.remove(f);
}
}
return resultProcessed;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -