⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wposgetter.java

📁 自己写的search engine, 有 boolean search, fuzzy search
💻 JAVA
字号:
package searchingEngine.dataPreprocessing.wordPosition;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.*;

import searchingEngine.dataPreprocessing.invertedFile.DocNode;
import searchingEngine.dataPreprocessing.invertedFile.TermNode;
import searchingEngine.dataPreprocessing.rawData.PostNode;
import searchingEngine.utilites.dataConverter.RawConverter;

public class WposGetter {
	public final String FILEPATH = "G:/ir/sortedPost118.txt";
	public final TermNode term_node;
	private int totalFound;

	/*constructor*/
	public WposGetter(String term) throws Exception {
		//set term and posting_list parameters
		term_node = new TermNode(term, new LinkedList());
		totalFound = 0;
	}
	
	/*get value from txt DB*/
	private LinkedList<TermNode> buildWPosStore(int i) throws Exception {
		BufferedReader br;
		String line;
		String keys = "";
		String splited[];
		ArrayList<String> keyList = new ArrayList<String>();
		LinkedList<TermNode> termList = new LinkedList<TermNode>();
		String current;
		TermNode termNode = null;
		//for (int i = 0; i <= max; i++) {
			br= new BufferedReader(new FileReader("G:/ir/sortedPost"+i+".txt"));
			while ((line = br.readLine())!= null) {
				splited = line.split(" ");
				current = splited[1];
				if ((keys.intern() != current.intern())){
					keyList.add(current);					
					keys = current;
					if (termNode!=null) termList.add(termNode);
					termNode = new TermNode(keys,new LinkedList<DocNode>());
					insert(termNode.doc_list,Integer.parseInt(splited[2]),Integer.parseInt(splited[3]));
					
				} else if (keys.intern() == current.intern()) {
					insert(termNode.doc_list,Integer.parseInt(splited[2]),Integer.parseInt(splited[3]));
				}
			}
			br.close();
		//}
			return termList;
	}
	
	private void sort(){
		Collections.sort(term_node.doc_list);
		 LinkedList docList = term_node.doc_list; 
		for (int j=0;j<docList.size();j++) {
			Collections.sort(((DocNodeWpos)docList.get(j)).wpos_list);
		}
	}
	
	/*insert method*/	
	private void insert(LinkedList<DocNode> docList,int fileid, int wpos) {
		DocNodeWpos docNodeWpos = null;
		if (docList.size() <= 0) {
			docNodeWpos = new DocNodeWpos(fileid);
			docNodeWpos.wpos_list.add(new Integer(wpos));
			
			docList.add(docNodeWpos);
		} else {
			int index = Collections.binarySearch(docList,new DocNodeWpos(fileid));
			LinkedList<Integer> tempWposList;
			if (index <0) {
				index = index*(-1) -1;
				docNodeWpos = new DocNodeWpos(fileid);
				if (docNodeWpos.wpos_list.size()<=0) docNodeWpos.wpos_list.add(wpos);
				else docNodeWpos.wpos_list.add(index,wpos);
				
				docList.add(docNodeWpos);
			} else {
				docNodeWpos =(DocNodeWpos)docList.get(index);
				tempWposList = docNodeWpos.wpos_list;
				int index1 = Collections.binarySearch(tempWposList,wpos);
				if (index1 <0) index1 = index1*(-1) -1;
				if (tempWposList.size()<=0) tempWposList.add(wpos);
				else tempWposList.add(index1,wpos);
				
			}
		}
	}
	
	public int getTotalFound(){
		return totalFound;
	}
	
	public static void main(String[] args) throws Exception {
		/*WposGetter wpos_store;
		BufferedReader br = new BufferedReader(new FileReader("keywordsList215986.txt"));
		String stringList[] = new String[215986];
		String line;
		int i = 0;
		while ((line = br.readLine())!=null){
			if (line.trim().length()!=0) {
				stringList[i] = line;
				i++;
			}
		}
		br.close();
		for (i = 0 ;i<stringList.length ; i++){
			wpos_store = new WposGetter(stringList[i]);
			wpos_store.buildWPosStore();
			RawConverter.saveListAsText(wpos_store.term_node.doc_list,"G:/ir/wpos/" +i+"wpos"+wpos_store.getTotalFound()+".txt");
		}
		*/
		
		WposGetter wpos_store = new WposGetter("haha");
		for (int i=57;i<=118;i++){
		RawConverter.saveObject(wpos_store.buildWPosStore(i),"G:/ir/wpos/combined"+i+".dat");
		System.out.println("done"+i);	
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -