⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 abstractuniondocumentiterator.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.search;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2003-2007 Paolo Boldi and Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.IndirectPriorityQueue;import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import java.io.IOException;import java.util.Iterator;/**  A document iterator on documents, generating the union of the documents returned * by a number of document iterators. *  * <p>The pattern of this class is the same as that of {@link AbstractIntersectionDocumentIterator}. * Additionally, this class provides a mechanism that makes accessible the set of component * document iterators that are {@linkplain #computeFront() positioned on the current document}. */public abstract class AbstractUnionDocumentIterator extends AbstractCompositeDocumentIterator {	private final static boolean DEBUG = false;	//private final static boolean ASSERTS = false;	/** A heap-based semi-indirect priority queue used to keep track of the currently scanned integers. */	final protected IntHeapSemiIndirectPriorityQueue queue;	/** The {@link IndirectPriorityQueue#front(int[])} of {@link #queue}, if {@link #frontSize} is not -1. */	final protected int[] front;	/** The reference array used for the queue. */	final protected int[] curr;	/** A map from indices to interval iterators. */	final private Reference2ReferenceArrayMap<Index,IntervalIterator> intervalIterators;	/** A map from indices to the iterators returned for the current document. The key set may	 * not contain an index because the related iterator has never been requested. Moreover,	 * the iterator in this map for a given index may differ from the one in {@link #intervalIterators}	 * because it could be {@link IntervalIterators#TRUE} (in fact, in that case it may even	 * happen that {@link #intervalIterators} does not contain the index). */	final private Reference2ReferenceArrayMap<Index,IntervalIterator> currentIterators;	/** An unmodifiable wrapper around {@link #currentIterators}. */	final private Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators;	/** The number of valid entries in {@link #front}, or -1 if the front has not been computed for the current document. */	protected int frontSize = -1;	/** Creates a new document iterator that computes the OR of the given array of iterators.	 *  @param documentIterator the iterators to be joined.	 * @throws IOException 	 */	protected AbstractUnionDocumentIterator( final DocumentIterator... documentIterator ) throws IOException {		super( documentIterator );		this.curr = new int[ n ];		queue = new IntHeapSemiIndirectPriorityQueue( curr );		intervalIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() );		currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() );		unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );		// Only add to the queue nonempty iterators...		for ( int i = 0; i < n; i++ ) if ( ( curr[ i ] = documentIterator[ i ].nextDocument() ) != -1 ) queue.enqueue( i );		// If queue is empty, the process is over				if ( ! queue.isEmpty() ) next = curr[ queue.first() ];		front = new int[ queue.size() ];	}	public int skipTo( final int n ) throws IOException {		if ( last >= n ) return last;		if ( next >= n ) return nextDocument();		if ( queue.isEmpty() ) return Integer.MAX_VALUE;		last = next = -1;		currentIterators.clear(); 		frontSize = -1; // Invalidate front		int first, res;		while( curr[ first = queue.first() ] < n ) {			res = documentIterator[ first ].skipTo( n );			// Cannot advance the minimum			if ( res == Integer.MAX_VALUE ) {				// Remove it				queue.dequeue();				// If nothing else remains, we are done				if ( queue.isEmpty() ) return Integer.MAX_VALUE;			}			else {				// Advance the top element, and signal this fact to the queue				curr[ first ] = res;				queue.changed();			}		}				return last = curr[ first ];	}		public int nextDocument() throws IOException {		if ( next != -1 ) {			// We already know what to return			last = next;			next = -1;			return last;		}		currentIterators.clear(); 		frontSize = -1; // Invalidate front		if ( queue.isEmpty() ) return last = -1;				// The least element		int first, c = curr[ queue.first() ];			// Advance all elements equal to the least one		while( curr[ first = queue.first() ] == c ) {			if ( ( curr[ first ] = documentIterator[ first ].nextDocument() ) != - 1 ) queue.changed();			else {				// Remove it				queue.dequeue();				// If nothing else remains, we are done				if ( queue.isEmpty() ) return last = -1;			}		}		return last = curr[ first ];	}		/** Forces computation of the current front, returning the number of indices it contains.	 * 	 * <p>After a call to this method, 	 * the first elements of {@link #front} contain	 * the indices of the {@linkplain AbstractCompositeDocumentIterator#documentIterator component document iterators}	 * that are positioned on the current document. If the front has already been	 * computed for the current document, this method has no side effects.	 * 	 * @return the size of the current front (the number of valid entries in {@link #front}).	 */		protected int computeFront() {		if ( frontSize == -1 ) frontSize = queue.front( front );		return frontSize;	}	public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {		final Iterator<Index> i = indices.iterator();		while( i.hasNext() ) intervalIterator( i.next() );		return unmodifiableCurrentIterators;	}	public IntervalIterator intervalIterator( final Index index ) throws IOException {		if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );		if ( last == -1 ) throw new IllegalStateException(); 		if ( ! indices.contains( index ) ) return IntervalIterators.TRUE;		IntervalIterator intervalIterator;		// If the iterator has been created and it's ready, we just return it.				if ( ( intervalIterator = currentIterators.get( index ) ) != null ) return intervalIterator;		boolean atLeastOneIsTrue = false;			/* If all iterators are TRUE, we return TRUE. Otherwise,		 * we skip all FALSE iterators and all iterators whose document		 * is not last, and merge the remaining ones. If		 * all iterators are FALSE, we return FALSE.		 */		// ALERT: this logic should be in OrDocumentIterator		int c = 0; // This counts non-TRUE, non-FALSE interval iterators		IntervalIterator oneNotTrue = null;		for( int i = computeFront(); i -- != 0; ) {			intervalIterator = documentIterator[ front[ i ] ].intervalIterator( index );			if ( intervalIterator == IntervalIterators.TRUE ) atLeastOneIsTrue = true;			else {				oneNotTrue = intervalIterator;				if ( intervalIterator != IntervalIterators.FALSE ) c++;			}		}		// If c == 0, all component iterators are either TRUE or FALSE. We return TRUE if at least one is TRUE.		if ( c == 0 ) intervalIterator = atLeastOneIsTrue? IntervalIterators.TRUE : IntervalIterators.FALSE;		else if ( c == 1 ) intervalIterator = oneNotTrue; // If we exclude TRUE and FALSE iterators, just oneNotTrue remains: we pass it upwards.		else {			intervalIterator = intervalIterators.get( index );			if ( intervalIterator == null ) intervalIterators.put( index, intervalIterator = getComposedIntervalIterator( index ) );			intervalIterator.reset();		}						currentIterators.put( index, intervalIterator );			return intervalIterator;	}	abstract protected IntervalIterator getComposedIntervalIterator( Index index );	/** Invokes {@link #acceptOnTruePaths(DocumentIteratorVisitor)} only on component	 * iterators positioned on the current document.	 * 	 * @param visitor a visitor.	 * @return true if the visit should continue.	 * @throws IOException 	 */		@Override	public boolean acceptOnTruePaths( DocumentIteratorVisitor visitor ) throws IOException {		if ( ! visitor.visitPre( this ) ) return false;		final int s  = computeFront();		for( int i = 0; i < s; i++ ) if ( ! documentIterator[ front[ i ] ].acceptOnTruePaths( visitor ) ) return false;		return visitor.visitPost( this );	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -