📄 lucenequeryoptimizer.java
字号:
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.searcher;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.QueryFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import java.util.LinkedHashMap;
import java.util.Map;
import java.io.IOException;
/**
* Utility which converts certain query clauses into {@link QueryFilter}s and
* caches these. Only required {@link TermQuery}s whose boost is zero and whose
* term occurs in at least a certain fraction of documents are converted to
* cached filters. This accellerates query constraints like language, document
* format, etc., which do not affect ranking but might otherwise slow search
* considerably.
*/
class LuceneQueryOptimizer {
private LinkedHashMap cache; // an LRU cache of QueryFilter
private float threshold;
/**
* Construct an optimizer that caches and uses filters for required {@link
* TermQuery}s whose boost is zero.
*
* @param cacheSize
* the number of QueryFilters to cache
* @param threshold
* the fraction of documents which must contain term
*/
public LuceneQueryOptimizer(final int cacheSize, float threshold) {
this.cache = new LinkedHashMap(cacheSize, 0.75f, true) {
protected boolean removeEldestEntry(Map.Entry eldest) {
return size() > cacheSize; // limit size of cache
}
};
this.threshold = threshold;
}
public TopDocs optimize(BooleanQuery original, Searcher searcher, int numHits,
int sortType, long start, long end) throws IOException {
BooleanQuery query = new BooleanQuery();
BooleanQuery filterQuery = null;
BooleanClause[] clauses = original.getClauses();
for (int i = 0; i < clauses.length; i++) {
BooleanClause c = clauses[i];
if (c.required // required
&& c.query.getBoost() == 0.0f // boost is zero
&& c.query instanceof TermQuery // TermQuery
&& (searcher.docFreq(((TermQuery) c.query).getTerm()) / (float) searcher
.maxDoc()) >= threshold) { // check threshold
if (filterQuery == null)
filterQuery = new BooleanQuery();
filterQuery.add(c.query, true, false); // filter it
} else {
query.add(c); // query it
}
}
Filter filter = null;
if (filterQuery != null) {
synchronized (cache) { // check cache
filter = (Filter) cache.get(filterQuery);
}
if (filter == null) { // miss
filter = new QueryFilter(filterQuery); // construct new entry
synchronized (cache) {
cache.put(filterQuery, filter); // cache it
}
}
}
/*
* Modified by Xie shuqiang,2006-07-28
*/
if (sortType == 0 || sortType == 3 || sortType == 4 || sortType == 5) {// 0 相关度排序; 3 计算score但不排序; 4 根据相关性和时间计算score鞍score排序;
return searcher.search(query, filter, numHits, sortType, start, end);
}
else if(sortType == 1 || sortType == 2 ) {//时间排序; sortType = 2,去掉相关度低的结果,按时间排序
return searcher.search(query, filter, numHits, new Sort(new SortField("pubTime", SortField.INT, true)),
sortType, start, end);
}
else
return null;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -