⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 worddistancequery.java

📁 一个用于搜索本地文件内容的小型搜索引擎
💻 JAVA
字号:
package queries;

import invertedList.FileRecord;
import invertedList.IQueryResult;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.StringTokenizer;

import stemmer.IStemmer;

public final class WordDistanceQuery extends Query {
	private String queryString;
	private List<String> wordsList;
	private int maxDistance;
	
	public WordDistanceQuery(String queryString){
		this.queryString = queryString;
		wordsList = new ArrayList<String>();
	}
	
	public void setQueryString(String queryString) {
		this.queryString = queryString;
	}

	@Override
	public Set<FileRecord> doQuery(IQueryResult queryProcessor) {
		StringTokenizer tokenizer = new StringTokenizer(queryString,"+\"~");
		int wordsCount = tokenizer.countTokens() - 1;
		
		for(int i = 0;i < wordsCount;i++){
			wordsList.add(tokenizer.nextToken());
		}
		
		maxDistance = Integer.parseInt(tokenizer.nextToken()) + 1;
		
		Iterator<String> iter = wordsList.iterator();
		
		List<Map<FileRecord,SortedSet<Integer>>> targetSet = new ArrayList<Map<FileRecord,SortedSet<Integer>>>();
		Set<FileRecord> fileSet = null;
		
		if(iter.hasNext()){
			Map<FileRecord,SortedSet<Integer>> temp = queryProcessor.getAllQueryResult(iter.next());
			fileSet = new HashSet<FileRecord>(temp.keySet());
			targetSet.add(temp);
		}
		
		while(iter.hasNext()){
			Map<FileRecord,SortedSet<Integer>> temp = queryProcessor.getAllQueryResult(iter.next());
			fileSet.retainAll(temp.keySet());
			targetSet.add(temp);
		}
		
		//now the resultSet contains the filename of all the files that contains all the words
		
		//Map<String,List<Set<Integer>>> data = new HashMap<String,List<Set<Integer>>>();
		
		Set<FileRecord> resultSet = new HashSet<FileRecord>();
		
		Iterator<FileRecord> fileIter = fileSet.iterator();
		while(fileIter.hasNext()){
			FileRecord currentRecord = fileIter.next();
			List<SortedSet<Integer>> tempList = new ArrayList<SortedSet<Integer>>();
			Iterator<Map<FileRecord,SortedSet<Integer>>> tempIter = targetSet.iterator();
			while(tempIter.hasNext()){
				tempList.add(tempIter.next().get(currentRecord));
			}
			if(isElligible(tempList)){
				resultSet.add(currentRecord);
			}
		}
		
		return resultSet;
	}

	private boolean isElligible(List<SortedSet<Integer>> tempList) {
		int start = tempList.get(0).first();
		return this.checkElligible(tempList, 0, start,Integer.MAX_VALUE - start - 1);
	}
	
	private boolean checkElligible(List<SortedSet<Integer>> tempList,int index,int start,int maxDistance){
		if(index == tempList.size()){
			return true;
		}
			
		SortedSet<Integer> currentSet = tempList.get(index).subSet(start, start + maxDistance + 1);
		if(!currentSet.isEmpty()){
			Iterator<Integer> iter = currentSet.iterator();
			while(iter.hasNext()){
				if(checkElligible(tempList,index + 1, iter.next(),this.maxDistance)){
					return true;
				}
			}
		}		
		
		return false;
	}

	@Override
	public void setStemmer(IStemmer stemmer) {
		// TODO Auto-generated method stub

	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -