📄 wposgetter.java
字号:
package searchingEngine.dataPreprocessing.wordPosition;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.*;
import searchingEngine.dataPreprocessing.invertedFile.DocNode;
import searchingEngine.dataPreprocessing.invertedFile.TermNode;
import searchingEngine.dataPreprocessing.rawData.PostNode;
import searchingEngine.utilites.dataConverter.RawConverter;
public class WposGetter {
public final String FILEPATH = "G:/ir/sortedPost118.txt";
public final TermNode term_node;
private int totalFound;
/*constructor*/
public WposGetter(String term) throws Exception {
//set term and posting_list parameters
term_node = new TermNode(term, new LinkedList());
totalFound = 0;
}
/*get value from txt DB*/
private LinkedList<TermNode> buildWPosStore(int i) throws Exception {
BufferedReader br;
String line;
String keys = "";
String splited[];
ArrayList<String> keyList = new ArrayList<String>();
LinkedList<TermNode> termList = new LinkedList<TermNode>();
String current;
TermNode termNode = null;
//for (int i = 0; i <= max; i++) {
br= new BufferedReader(new FileReader("G:/ir/sortedPost"+i+".txt"));
while ((line = br.readLine())!= null) {
splited = line.split(" ");
current = splited[1];
if ((keys.intern() != current.intern())){
keyList.add(current);
keys = current;
if (termNode!=null) termList.add(termNode);
termNode = new TermNode(keys,new LinkedList<DocNode>());
insert(termNode.doc_list,Integer.parseInt(splited[2]),Integer.parseInt(splited[3]));
} else if (keys.intern() == current.intern()) {
insert(termNode.doc_list,Integer.parseInt(splited[2]),Integer.parseInt(splited[3]));
}
}
br.close();
//}
return termList;
}
private void sort(){
Collections.sort(term_node.doc_list);
LinkedList docList = term_node.doc_list;
for (int j=0;j<docList.size();j++) {
Collections.sort(((DocNodeWpos)docList.get(j)).wpos_list);
}
}
/*insert method*/
private void insert(LinkedList<DocNode> docList,int fileid, int wpos) {
DocNodeWpos docNodeWpos = null;
if (docList.size() <= 0) {
docNodeWpos = new DocNodeWpos(fileid);
docNodeWpos.wpos_list.add(new Integer(wpos));
docList.add(docNodeWpos);
} else {
int index = Collections.binarySearch(docList,new DocNodeWpos(fileid));
LinkedList<Integer> tempWposList;
if (index <0) {
index = index*(-1) -1;
docNodeWpos = new DocNodeWpos(fileid);
if (docNodeWpos.wpos_list.size()<=0) docNodeWpos.wpos_list.add(wpos);
else docNodeWpos.wpos_list.add(index,wpos);
docList.add(docNodeWpos);
} else {
docNodeWpos =(DocNodeWpos)docList.get(index);
tempWposList = docNodeWpos.wpos_list;
int index1 = Collections.binarySearch(tempWposList,wpos);
if (index1 <0) index1 = index1*(-1) -1;
if (tempWposList.size()<=0) tempWposList.add(wpos);
else tempWposList.add(index1,wpos);
}
}
}
public int getTotalFound(){
return totalFound;
}
public static void main(String[] args) throws Exception {
/*WposGetter wpos_store;
BufferedReader br = new BufferedReader(new FileReader("keywordsList215986.txt"));
String stringList[] = new String[215986];
String line;
int i = 0;
while ((line = br.readLine())!=null){
if (line.trim().length()!=0) {
stringList[i] = line;
i++;
}
}
br.close();
for (i = 0 ;i<stringList.length ; i++){
wpos_store = new WposGetter(stringList[i]);
wpos_store.buildWPosStore();
RawConverter.saveListAsText(wpos_store.term_node.doc_list,"G:/ir/wpos/" +i+"wpos"+wpos_store.getTotalFound()+".txt");
}
*/
WposGetter wpos_store = new WposGetter("haha");
for (int i=57;i<=118;i++){
RawConverter.saveObject(wpos_store.buildWPosStore(i),"G:/ir/wpos/combined"+i+".dat");
System.out.println("done"+i);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -