📄 documentalmergedclusterdocumentiterator.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.index.cluster;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2006-2007 Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.ReferenceArraySet;import it.unimi.dsi.fastutil.objects.ReferenceSet;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.search.AbstractDocumentIterator;import it.unimi.dsi.mg4j.search.DocumentIterator;import it.unimi.dsi.mg4j.search.IntervalIterator;import it.unimi.dsi.mg4j.search.IntervalIterators;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import java.io.IOException;/** A document iterator merging iterators from local indices. *  * @author Sebastiano Vigna */public class DocumentalMergedClusterDocumentIterator extends AbstractDocumentIterator implements DocumentIterator {	/** The component document iterators. */	final protected DocumentIterator[] documentIterator;	/** The number of component iterators. */	final protected int n;	/** The indices corresponding to each underlying document iterator. */	protected final int[] usedIndex;	/** The cached strategy of the index we refer to. */	protected final DocumentalClusteringStrategy strategy;	/** The queue of document iterator indices (offsets into {@link #documentIterator} and {@link #usedIndex}). */	protected final IntHeapSemiIndirectPriorityQueue queue;	/** The reference array for the queue (containing <em>global</em> document pointers). */	protected final int[] globalDocumentPointer;	/** The set of indices involved in this iterator. */	protected final ReferenceSet<Index> indices = new ReferenceArraySet<Index>();	/** The underlying index reader. */	private final DocumentalClusterIndexReader indexReader;	/** The current iterator. */	protected int currentIterator = -1;	/** Whether there are no more documents to be returned. */	protected boolean exhausted;		/** Creates a new document iterator for a documental cluster.	 * 	 * <p>This constructor uses an array of document iterators that it is not required to be full.	 * This is very useful with rare terms.	 * 	 * @param indexReader the underlying index reader.	 * @param documentIterator an array of document iterators.	 * @param usedIndex an array parallel to <code>documentIterator</code> containing the ordinal numbers	 * of the indices corresponding to the iterators.	 */		public DocumentalMergedClusterDocumentIterator( final DocumentalClusterIndexReader indexReader, final DocumentIterator[] documentIterator, int[] usedIndex ) throws IOException {		this.documentIterator = documentIterator;		this.n = documentIterator.length;		this.indexReader = indexReader;		this.usedIndex = usedIndex;				strategy = indexReader.index.strategy;		globalDocumentPointer = new int[ n ];		queue = new IntHeapSemiIndirectPriorityQueue( globalDocumentPointer, n );				int result;		for( int i = n; i-- != 0; ) {			if ( ( result = documentIterator[ i ].nextDocument() ) != -1 ) {				indices.addAll( documentIterator[ i ].indices() );				globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], result );				queue.enqueue( i );			}		}				if ( queue.isEmpty() ) exhausted = true;		else {			currentIterator = queue.first();			next = globalDocumentPointer[ currentIterator ];		}	}	public IntervalIterator intervalIterator() throws IOException {		if ( last == -1 ) throw new IllegalStateException();		return documentIterator[ currentIterator ].intervalIterator();	}		public IntervalIterator intervalIterator( Index index ) throws IOException {		if ( last == -1 ) throw new IllegalStateException();		if ( ! indices.contains( index ) ) return IntervalIterators.TRUE;		return documentIterator[ currentIterator ].intervalIterator( index );	}	public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {		if ( last == -1 ) throw new IllegalStateException();		return documentIterator[ currentIterator ].intervalIterators();	}	public ReferenceSet<Index> indices() {		return indices;	}	// TODO: this needs tests	public int skipTo( final int p ) throws IOException {		int i, d;		if ( p <= last ) return last;				//System.err.println( "Advancing to " + n  + " doc: " + Arrays.toString( doc ) + " first: " + queue.first() );		next = -1;		while( ! queue.isEmpty() && globalDocumentPointer[ i = queue.first() ] < p ) {			d = documentIterator[ i ].skipTo( strategy.localPointer( p ) );			if ( d == Integer.MAX_VALUE ) queue.dequeue();			else {				globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], d );				if ( globalDocumentPointer[ i ] < p ) queue.dequeue(); // This covers the case of getting to the end of list without finding p 				else queue.changed();			}		}				if ( queue.isEmpty() ) {			exhausted = true;			last = -1;			return Integer.MAX_VALUE;		}				return last = globalDocumentPointer[ currentIterator = queue.first() ];	}	public int nextDocument() throws IOException {		if ( next >= 0 ) {			last = next;			next = -1;			return last;		}				if ( exhausted ) return last = -1;				final int result;		if ( ( result = documentIterator[ currentIterator ].nextDocument() ) != -1 ) {			globalDocumentPointer[ currentIterator ] = strategy.globalPointer( usedIndex[ currentIterator ], result );			queue.changed();		}		else queue.dequeue();		if ( queue.isEmpty() ) {			exhausted = true;			return last = -1;		}		currentIterator = queue.first();		return last = globalDocumentPointer[ currentIterator ];	}		public boolean accept( DocumentIteratorVisitor visitor ) throws IOException {		boolean result = true;		for( DocumentIterator d: documentIterator ) if ( ! ( result &= d.accept( visitor ) ) ) break;		return result;	}	public boolean acceptOnTruePaths( DocumentIteratorVisitor visitor ) throws IOException {		return documentIterator[ currentIterator ].acceptOnTruePaths( visitor );	}	public void dispose() throws IOException {					indexReader.close();	}}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -