inaccuratehits.java

来自「Lucene Hack之通过缩小搜索结果集来提升性能」· Java 代码 · 共 260 行

JAVA

260 行

package org.apache.lucene.search;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;
import java.util.Iterator;
import java.util.Vector;

import org.apache.lucene.document.Document;
import org.apache.lucene.util.InaccurateResultAggregation;

/**
 * <p>
 * Rewritten by caocao (http://www.caocao.name)
 * 
 * <p>
 * Most of source code in InaccurateHits came from Hits. It can't extends Hits
 * because Hits was finalized. Hope the seal will be removed in next version.
 */

/** A ranked list of documents, used to hold search results. */
public class InaccurateHits {
	private Query query;

	private Weight weight;

	private InaccurateIndexSearcher searcher;

	private Filter filter = null;

	private Sort sort = null;

	private boolean ascending = true;

	private boolean accurate = true;

	private int length; // the total number of hits

	private int totalLength; // the total number of hits

	private Vector hitDocs = new Vector(); // cache of hits retrieved

	private HitDoc first; // head of LRU cache

	private HitDoc last; // tail of LRU cache

	private int numDocs = 0; // number cached

	private int maxDocs = 200; // max to cache

	InaccurateHits(InaccurateIndexSearcher s, Query q, Filter f,
			boolean ascending) throws IOException {
		query = q;
		weight = q.weight(s);
		searcher = s;
		filter = f;
		this.ascending = ascending;
		this.accurate = true;
		getMoreDocs(50); // retrieve 100 initially
	}

	InaccurateHits(InaccurateIndexSearcher s, Query q, Filter f, Sort o,
			boolean ascending) throws IOException {
		query = q;
		weight = q.weight(s);
		searcher = s;
		filter = f;
		sort = o;
		this.ascending = ascending;
		this.accurate = true;
		getMoreDocs(50); // retrieve 100 initially
	}

	/**
	 * Tries to add new documents to hitDocs. Ensures that the hit numbered
	 * <code>min</code> has been retrieved.
	 */
	@SuppressWarnings("unchecked")
	private final void getMoreDocs(int min) throws IOException {
		if (hitDocs.size() > min) {
			min = hitDocs.size();
		}

		int n = min * 2; // double # retrieved
		TopDocs topDocs = (sort == null) ? searcher.search(weight, filter, n,
				ascending) : searcher
				.search(weight, filter, n, sort, ascending);
		length = topDocs.totalHits;
		InaccurateResultAggregation inaccurateResultAggregation = searcher
				.getInaccurateResultAggregation();
		if (inaccurateResultAggregation == null) {
			totalLength = length;
		} else {
			accurate = inaccurateResultAggregation.isAccurate();
			if (inaccurateResultAggregation.isAccurate()) {
				totalLength = inaccurateResultAggregation
						.getNumberOfRecordsFound();
			} else {
				int maxDocID = searcher.maxDoc();
				totalLength = 1000 * ((int) Math
						.ceil((0.001
								* maxDocID
								/ (inaccurateResultAggregation.getLastDocID() + 1) * inaccurateResultAggregation
								.getNumberOfRecordsFetched())));
				// System.out.println("Guessing "
				// + totalLength
				// + "="
				// + maxDocID
				// + "/"
				// + inaccurateResultAggregation.getLastDocID()
				// + "*"
				// + inaccurateResultAggregation
				// .getNumberOfRecordsFetched());
			}
		}
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;

		float scoreNorm = 1.0f;

		if (length > 0 && topDocs.getMaxScore() > 1.0f) {
			scoreNorm = 1.0f / topDocs.getMaxScore();
		}

		int end = scoreDocs.length < length ? scoreDocs.length : length;
		for (int i = hitDocs.size(); i < end; i++) {
			hitDocs.addElement(new HitDoc(scoreDocs[i].score * scoreNorm,
					scoreDocs[i].doc));
		}
	}

	/** Returns the total number of hits available in this set. */
	public final int length() {
		return length;
	}

	/**
	 * Returns the stored fields of the n<sup>th</sup> document in this set.
	 * <p>
	 * Documents are cached, so that repeated requests for the same element may
	 * return the same Document object.
	 */
	public final Document doc(int n) throws IOException {
		HitDoc hitDoc = hitDoc(n);

		// Update LRU cache of documents
		remove(hitDoc); // remove from list, if there
		addToFront(hitDoc); // add to front of list
		if (numDocs > maxDocs) { // if cache is full
			HitDoc oldLast = last;
			remove(last); // flush last
			oldLast.doc = null; // let doc get gc'd
		}

		if (hitDoc.doc == null) {
			hitDoc.doc = searcher.doc(hitDoc.id); // cache miss: read document
		}

		return hitDoc.doc;
	}

	/** Returns the score for the nth document in this set. */
	public final float score(int n) throws IOException {
		return hitDoc(n).score;
	}

	/** Returns the id for the nth document in this set. */
	public final int id(int n) throws IOException {
		return hitDoc(n).id;
	}

	/**
	 * Returns a {@link HitIterator} to navigate the Hits. Each item returned
	 * from {@link Iterator#next()} is a {@link Hit}.
	 * <p>
	 * <b>Caution:</b> Iterate only over the hits needed. Iterating over all
	 * hits is generally not desirable and may be the source of performance
	 * issues.
	 * </p>
	 */
	public Iterator iterator() {
		return new InaccurateHitIterator(this);
	}

	private final HitDoc hitDoc(int n) throws IOException {
		if (n >= length) {
			throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
		}

		if (n >= hitDocs.size()) {
			getMoreDocs(n);
		}

		return (HitDoc) hitDocs.elementAt(n);
	}

	private final void addToFront(HitDoc hitDoc) { // insert at front of cache
		if (first == null) {
			last = hitDoc;
		} else {
			first.prev = hitDoc;
		}

		hitDoc.next = first;
		first = hitDoc;
		hitDoc.prev = null;

		numDocs++;
	}

	private final void remove(HitDoc hitDoc) { // remove from cache
		if (hitDoc.doc == null) { // it's not in the list
			return; // abort
		}

		if (hitDoc.next == null) {
			last = hitDoc.prev;
		} else {
			hitDoc.next.prev = hitDoc.prev;
		}

		if (hitDoc.prev == null) {
			first = hitDoc.next;
		} else {
			hitDoc.prev.next = hitDoc.next;
		}

		numDocs--;
	}

	public boolean isAscending() {
		return ascending;
	}

	public Query getQuery() {
		return query;
	}

	public int getTotalLength() {
		return totalLength;
	}

	public boolean isAccurate() {
		return accurate;
	}
}

inaccuratehits.java - 源码说明

本页面展示了「Lucene Hack之通过缩小搜索结果集来提升性能」中的 inaccuratehits.java 源码文件，采用 Java 编程语言编写，共 260 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫开发者社区收录了大量与Lucene相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?