⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ordocumentiterator.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.search;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2003-2007 Paolo Boldi and Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.index.IndexIterator;import it.unimi.dsi.util.Interval;import it.unimi.dsi.util.Intervals;import java.io.IOException;/** An iterator on documents that returns the OR of a number of document iterators.** <P>This class adds to {@link it.unimi.dsi.mg4j.search.AbstractUnionDocumentIterator}* an interval iterator generating the OR of the intervals returned for each of the documents involved. */public class OrDocumentIterator extends AbstractUnionDocumentIterator implements DocumentIterator {	@SuppressWarnings("unused")	private final static boolean DEBUG = false;	/** Returns a document iterator that computes the OR of the given array of iterators.	 * 	 * <P>Note that the special case of the empty and of the singleton arrays	 * are handled efficiently.	 * 	 * @param documentIterator the iterators to be joined.	 * @return a document iterator that computes the OR of <code>it</code>. 	 * @throws IOException 	 */	public static DocumentIterator getInstance( final DocumentIterator... documentIterator  ) throws IOException {		if ( documentIterator.length == 0 ) return DocumentIterators.EMPTY_ITERATOR;		if ( documentIterator.length == 1 ) return documentIterator[ 0 ];		return new OrDocumentIterator( documentIterator );	}	/** Creates a new document iterator that computes the OR of the given array of iterators.	 *  @param documentIterator the iterators to be joined.	 * @throws IOException 	 */	protected OrDocumentIterator( final DocumentIterator... documentIterator ) throws IOException {		super( documentIterator );	}	protected IntervalIterator getComposedIntervalIterator( final Index index ) {		return indexIterator == null ? new OrIntervalIterator( index )  : new OrIndexIntervalIterator( index );	}	/** An interval iterator of nondecreasing disjoint intervals 	 *  obtained ORing the output of a set of iterators with	 *  the same property.	 */	private class OrIntervalIterator extends AbstractCompositeIntervalIterator {		/** The index of this iterator. */		final Index index;		/** A heap-based indirect priority queue used to keep track of the currently scanned intervals. */		private ObjectHeapSemiIndirectPriorityQueue<Interval> intervalQueue;		/** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */		private int lastLeft;		/** An array to hold the front of the interval queue. */		private final int[] intervalFront;		/** Creates a new OR interval iterator.		 * 		 * @param index the index of the iterator.		 */		public OrIntervalIterator( final Index index ) {			super( n );			// We just set up some internal data, but we perform no initialisation.			this.index = index;			intervalQueue = new ObjectHeapSemiIndirectPriorityQueue<Interval>( curr, Intervals.ENDS_BEFORE_OR_IS_SUFFIX );			intervalFront = new int[ n ];		}		public void reset() throws IOException {			lastLeft = Integer.MIN_VALUE;			next = null;			intervalQueue.clear();			for ( int i = computeFront(), k; i-- != 0; ) {				k = front[ i ];				intervalIterator[ k ] = documentIterator[ k ].intervalIterator( index );				if ( intervalIterator[ k ] != IntervalIterators.TRUE && ( curr[ k ] = intervalIterator[ k ].nextInterval() ) != null ) intervalQueue.enqueue( k );			}		}		public void intervalTerms( final IntSet terms ) {			final int frontSize = intervalQueue.front( intervalFront );			final int[] intervalFront = this.intervalFront;			for( int i = frontSize; i-- != 0; ) intervalIterator[ intervalFront[ i ] ].intervalTerms( terms );		}				public Interval nextInterval () throws IOException {			if ( next != null ) {				final Interval result = next;				next = null; 				return result;			}			if ( intervalQueue.isEmpty() ) return null;			int first;			while ( curr[ first = intervalQueue.first() ].left <= lastLeft ) {				if ( ( curr[ first ] = intervalIterator[ first ].nextInterval() ) != null ) intervalQueue.changed();				else {					intervalQueue.dequeue();					if ( intervalQueue.isEmpty() ) return null;				}			} 			lastLeft = curr[ first ].left;			return curr[ first ];		}		public int extent() {			int e, s;			s = Integer.MAX_VALUE;			for ( int i = computeFront(), k; i-- != 0; ) {				k = front[ i ];				e = curr[ k ] != null ? intervalIterator[ k ].extent() : Integer.MAX_VALUE;				if ( e < s ) s = e;			}			return s;		}	}		/** An optimised interval iterator with the same semantics as that implemented	 *  by {@link OrDocumentIterator}, but using just {@link IndexIterator#positionArray()}.	 */	protected class OrIndexIntervalIterator extends AbstractCompositeIndexIntervalIterator implements IntervalIterator {		@SuppressWarnings({ "unused", "hiding" })		private final static boolean DEBUG = false;		@SuppressWarnings("hiding")		private final static boolean ASSERTS = false;		/** The index of this iterator. */		final Index index;		/** A heap-based semi-indirect priority queue used to keep track of the currently scanned positions. */		private final IntHeapSemiIndirectPriorityQueue positionQueue;		/** An array to hold the front of the position queue. */		private final int[] positionFront;				public OrIndexIntervalIterator( final Index index ) {			super( n );			this.index = index;			positionQueue = new IntHeapSemiIndirectPriorityQueue( curr );			positionFront = new int[ n ]; 		}		public void reset() throws IOException {			positionQueue.clear();			for ( int i = computeFront(), k; i-- != 0; ) {				k = front[ i ];				if ( indexIterator[ k ].index() == index ) {					position[ k ] = indexIterator[ k ].positionArray();					count[ k ] = indexIterator[ k ].count();					curr[ k ] = position[ k ][ currPos[ k ] = 0 ];					positionQueue.enqueue( k );				}			}						if ( ASSERTS ) assert ! positionQueue.isEmpty();			next = Interval.valueOf( curr[ positionQueue.first() ] );		}		public void intervalTerms( final IntSet terms ) {			final int frontSize = positionQueue.front( positionFront );			final int[] positionFront = this.positionFront;			for( int i = frontSize; i-- != 0; ) terms.add( indexIterator[ positionFront[ i ] ].termNumber() );		}				public Interval nextInterval() {			if ( next != null ) {				final Interval result = next;				next = null; 				return result;			}						if ( positionQueue.isEmpty() ) return null;						int first = positionQueue.first();			final int previous = curr[ first ];						do				if ( ++currPos[ first ] == count[ first ] ) {					positionQueue.dequeue();					if ( positionQueue.isEmpty() ) return null;				}				else {					curr[ first ] = position[ first ][ currPos[ first ] ];					positionQueue.changed();				}			while ( curr[ first = positionQueue.first() ] == previous );			return Interval.valueOf( curr[ first ] );		}		public int extent() {			return 1;		} 	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -