⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 combinedocnodebyline.java

📁 自己写的search engine, 有 boolean search, fuzzy search
💻 JAVA
字号:
package searchingEngine.dataPreprocessing.wordPosition;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.LinkedList;

import searchingEngine.Parameters;
import searchingEngine.dataPreprocessing.invertedFile.DocNode;
import searchingEngine.dataPreprocessing.invertedFile.TermNode;

public class CombineDocNodeByLine {

	/**
	 * @param args
	 */
	private String inputA,inputB;
	private String output;
	public CombineDocNodeByLine(String inputA,String inputB,String output){
		this.inputA=inputA;
		this.inputB=inputB;
		this.output=output;
	}
	
	public void combine() throws IOException{
		BufferedReader br1 = new BufferedReader(new FileReader(inputA));
		BufferedReader br2 = new BufferedReader(new FileReader(inputB));
		BufferedWriter bw =  new BufferedWriter(new FileWriter(output));
		String term1, term2;
		TermNode termNode1, termNode2;
		short who;
		int count=0;
		while (true) {
			who = 0;
			term1 = br1.readLine();
			term2 = br2.readLine();
			if (term1 == null) { who = 1; break;}
			if (term2 == null) { who = 2; break;}
			termNode1 = loadTerm(term1);
			termNode2 = loadTerm(term2);
			if (termNode1.equals(termNode2)){
				bw.write(merge(termNode1,termNode2).toString());
				bw.newLine();
			} else if (termNode1.compareTo(termNode2)<0) {
				bw.write(termNode1.toString());
				bw.newLine();
			} else {
				bw.write(termNode2.toString());
				bw.newLine();
			}
			if (count%5000==0) {System.out.println(count);}
			count++;
		}
		if (who==1) {
			if (term2 != null) {
				bw.write(loadTerm(term2).toString());
				while ((term2 = br2.readLine()) != null) {
					bw.write(loadTerm(term2).toString());
					bw.newLine();
				}
			}
		} else 	if (who==2) {
			if (term1 != null) {
				bw.write(loadTerm(term1).toString());
				while ((term1 = br1.readLine()) != null) {
					bw.write(loadTerm(term1).toString());
					bw.newLine();
				}
			}
		}
		br1.close();
		br2.close();
		bw.close();
		
	}
	
	private TermNode merge(TermNode term1, TermNode term2){
		return new TermNode(term1.term, mergeDocList(term1.doc_list,term2.doc_list));
	}
	
	private LinkedList mergeDocList(LinkedList list1,LinkedList list2){
		int indexList1=0,indexList2 = 0;
		int hostLen=list1.size(), subLen = list2.size();
		LinkedList newList = new LinkedList();
		while ((indexList1<hostLen)||(indexList2<subLen)){
			if (indexList1>=hostLen) {
				newList.addAll(list2.subList(indexList2,subLen));
				break;
			}
			if (indexList2>=subLen) {
				newList.addAll(list1.subList(indexList1,hostLen));
				break;
			}
			int checker = ((DocNodeWpos)list1.get(indexList1)).compareTo(((DocNodeWpos)list2.get(indexList2)));
			if ( checker ==0) {
				newList.add(mergeWpos(((DocNodeWpos)list1.get(indexList1)).wpos_list,((DocNodeWpos)list2.get(indexList2)).wpos_list));
				indexList1++;
				indexList2++;
			} else if ( checker <0) {
				newList.add(list1.get(indexList1));
				indexList1++;
			} else if ( checker >0) {
				newList.add(list2.get(indexList2));
				indexList2++;
			}
		}
		return newList;
	}
	
	private LinkedList mergeWpos(LinkedList<Integer> list1,LinkedList<Integer> list2){
		int indexList1=0,indexList2 = 0;
		int hostLen=list1.size(), subLen = list2.size();
		LinkedList newList = new LinkedList();
		while ((indexList1<hostLen)||(indexList2<subLen)){
			if (indexList1>=hostLen) {
				newList.addAll(list2.subList(indexList2,subLen));
				break;
			}
			if (indexList2>=subLen) {
				newList.addAll(list1.subList(indexList1,hostLen));
				break;
			}
			int checker = list1.get(indexList1).compareTo(list2.get(indexList2));
			if ( checker <0) {
				newList.add(list1.get(indexList1));
				indexList1++;
			} else if ( checker >0) {
				newList.add(list2.get(indexList2));
				indexList2++;
			}
		}
		return newList;
	}
	
	public LinkedList<TermNode> loadTermList(String input) throws IOException{
		LinkedList<TermNode> result = new LinkedList<TermNode>();
		BufferedReader br = new BufferedReader(new FileReader(input));
		String line;
		while ((line = br.readLine())!= null) {
			result.add(loadTerm(line));
		}
		return result;	
	}
	
	public TermNode loadTerm(String input){
		String splite[] = input.split(" ");
		LinkedList docListWpos = new LinkedList();
		LinkedList<Integer> wposList;
		String fileid;
		for (int i=4; i<splite.length-6 ;i++){
			fileid = splite[i];
			wposList = new LinkedList<Integer>();
			i= i+4;
			while (splite[i].intern()!= "]".intern()) {
				wposList.add(Integer.parseInt(splite[i]));
				i++;
			}
			
			docListWpos.add(new DocNodeWpos(Integer.parseInt(fileid),wposList));
		}
		return (new TermNode(splite[0],docListWpos));
	}
	
	public static void main(String[] args) throws IOException {
		//CombineDocNodeByLine cbnl = new CombineDocNodeByLine("new/combineXX0.txt","new/combineXX96.txt","new/combineXXX0.txt");
		//cbnl.combine();
		/*
		BufferedReader br = new BufferedReader(new FileReader(input));
		String line;
		TermNode termNode;
		int count = 0;
		CombineDocNodeByLine cbnl = new CombineDocNodeByLine("","","");
		while ((line=br.readLine())!=null){
			termNode = cbnl.loadTerm(line);
			......
		}
		br.close();
		*/
	}
	

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -