📄 combinedocnodebyline.java
字号:
package searchingEngine.dataPreprocessing.wordPosition;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.LinkedList;
import searchingEngine.Parameters;
import searchingEngine.dataPreprocessing.invertedFile.DocNode;
import searchingEngine.dataPreprocessing.invertedFile.TermNode;
public class CombineDocNodeByLine {
/**
* @param args
*/
private String inputA,inputB;
private String output;
public CombineDocNodeByLine(String inputA,String inputB,String output){
this.inputA=inputA;
this.inputB=inputB;
this.output=output;
}
public void combine() throws IOException{
BufferedReader br1 = new BufferedReader(new FileReader(inputA));
BufferedReader br2 = new BufferedReader(new FileReader(inputB));
BufferedWriter bw = new BufferedWriter(new FileWriter(output));
String term1, term2;
TermNode termNode1, termNode2;
short who;
int count=0;
while (true) {
who = 0;
term1 = br1.readLine();
term2 = br2.readLine();
if (term1 == null) { who = 1; break;}
if (term2 == null) { who = 2; break;}
termNode1 = loadTerm(term1);
termNode2 = loadTerm(term2);
if (termNode1.equals(termNode2)){
bw.write(merge(termNode1,termNode2).toString());
bw.newLine();
} else if (termNode1.compareTo(termNode2)<0) {
bw.write(termNode1.toString());
bw.newLine();
} else {
bw.write(termNode2.toString());
bw.newLine();
}
if (count%5000==0) {System.out.println(count);}
count++;
}
if (who==1) {
if (term2 != null) {
bw.write(loadTerm(term2).toString());
while ((term2 = br2.readLine()) != null) {
bw.write(loadTerm(term2).toString());
bw.newLine();
}
}
} else if (who==2) {
if (term1 != null) {
bw.write(loadTerm(term1).toString());
while ((term1 = br1.readLine()) != null) {
bw.write(loadTerm(term1).toString());
bw.newLine();
}
}
}
br1.close();
br2.close();
bw.close();
}
private TermNode merge(TermNode term1, TermNode term2){
return new TermNode(term1.term, mergeDocList(term1.doc_list,term2.doc_list));
}
private LinkedList mergeDocList(LinkedList list1,LinkedList list2){
int indexList1=0,indexList2 = 0;
int hostLen=list1.size(), subLen = list2.size();
LinkedList newList = new LinkedList();
while ((indexList1<hostLen)||(indexList2<subLen)){
if (indexList1>=hostLen) {
newList.addAll(list2.subList(indexList2,subLen));
break;
}
if (indexList2>=subLen) {
newList.addAll(list1.subList(indexList1,hostLen));
break;
}
int checker = ((DocNodeWpos)list1.get(indexList1)).compareTo(((DocNodeWpos)list2.get(indexList2)));
if ( checker ==0) {
newList.add(mergeWpos(((DocNodeWpos)list1.get(indexList1)).wpos_list,((DocNodeWpos)list2.get(indexList2)).wpos_list));
indexList1++;
indexList2++;
} else if ( checker <0) {
newList.add(list1.get(indexList1));
indexList1++;
} else if ( checker >0) {
newList.add(list2.get(indexList2));
indexList2++;
}
}
return newList;
}
private LinkedList mergeWpos(LinkedList<Integer> list1,LinkedList<Integer> list2){
int indexList1=0,indexList2 = 0;
int hostLen=list1.size(), subLen = list2.size();
LinkedList newList = new LinkedList();
while ((indexList1<hostLen)||(indexList2<subLen)){
if (indexList1>=hostLen) {
newList.addAll(list2.subList(indexList2,subLen));
break;
}
if (indexList2>=subLen) {
newList.addAll(list1.subList(indexList1,hostLen));
break;
}
int checker = list1.get(indexList1).compareTo(list2.get(indexList2));
if ( checker <0) {
newList.add(list1.get(indexList1));
indexList1++;
} else if ( checker >0) {
newList.add(list2.get(indexList2));
indexList2++;
}
}
return newList;
}
public LinkedList<TermNode> loadTermList(String input) throws IOException{
LinkedList<TermNode> result = new LinkedList<TermNode>();
BufferedReader br = new BufferedReader(new FileReader(input));
String line;
while ((line = br.readLine())!= null) {
result.add(loadTerm(line));
}
return result;
}
public TermNode loadTerm(String input){
String splite[] = input.split(" ");
LinkedList docListWpos = new LinkedList();
LinkedList<Integer> wposList;
String fileid;
for (int i=4; i<splite.length-6 ;i++){
fileid = splite[i];
wposList = new LinkedList<Integer>();
i= i+4;
while (splite[i].intern()!= "]".intern()) {
wposList.add(Integer.parseInt(splite[i]));
i++;
}
docListWpos.add(new DocNodeWpos(Integer.parseInt(fileid),wposList));
}
return (new TermNode(splite[0],docListWpos));
}
public static void main(String[] args) throws IOException {
//CombineDocNodeByLine cbnl = new CombineDocNodeByLine("new/combineXX0.txt","new/combineXX96.txt","new/combineXXX0.txt");
//cbnl.combine();
/*
BufferedReader br = new BufferedReader(new FileReader(input));
String line;
TermNode termNode;
int count = 0;
CombineDocNodeByLine cbnl = new CombineDocNodeByLine("","","");
while ((line=br.readLine())!=null){
termNode = cbnl.loadTerm(line);
......
}
br.close();
*/
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -