📄 ordocumentiterator.java
字号:
package it.unimi.dsi.mg4j.search;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2003-2007 Paolo Boldi and Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.index.IndexIterator;import it.unimi.dsi.util.Interval;import it.unimi.dsi.util.Intervals;import java.io.IOException;/** An iterator on documents that returns the OR of a number of document iterators.** <P>This class adds to {@link it.unimi.dsi.mg4j.search.AbstractUnionDocumentIterator}* an interval iterator generating the OR of the intervals returned for each of the documents involved. */public class OrDocumentIterator extends AbstractUnionDocumentIterator implements DocumentIterator { @SuppressWarnings("unused") private final static boolean DEBUG = false; /** Returns a document iterator that computes the OR of the given array of iterators. * * <P>Note that the special case of the empty and of the singleton arrays * are handled efficiently. * * @param documentIterator the iterators to be joined. * @return a document iterator that computes the OR of <code>it</code>. * @throws IOException */ public static DocumentIterator getInstance( final DocumentIterator... documentIterator ) throws IOException { if ( documentIterator.length == 0 ) return DocumentIterators.EMPTY_ITERATOR; if ( documentIterator.length == 1 ) return documentIterator[ 0 ]; return new OrDocumentIterator( documentIterator ); } /** Creates a new document iterator that computes the OR of the given array of iterators. * @param documentIterator the iterators to be joined. * @throws IOException */ protected OrDocumentIterator( final DocumentIterator... documentIterator ) throws IOException { super( documentIterator ); } protected IntervalIterator getComposedIntervalIterator( final Index index ) { return indexIterator == null ? new OrIntervalIterator( index ) : new OrIndexIntervalIterator( index ); } /** An interval iterator of nondecreasing disjoint intervals * obtained ORing the output of a set of iterators with * the same property. */ private class OrIntervalIterator extends AbstractCompositeIntervalIterator { /** The index of this iterator. */ final Index index; /** A heap-based indirect priority queue used to keep track of the currently scanned intervals. */ private ObjectHeapSemiIndirectPriorityQueue<Interval> intervalQueue; /** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */ private int lastLeft; /** An array to hold the front of the interval queue. */ private final int[] intervalFront; /** Creates a new OR interval iterator. * * @param index the index of the iterator. */ public OrIntervalIterator( final Index index ) { super( n ); // We just set up some internal data, but we perform no initialisation. this.index = index; intervalQueue = new ObjectHeapSemiIndirectPriorityQueue<Interval>( curr, Intervals.ENDS_BEFORE_OR_IS_SUFFIX ); intervalFront = new int[ n ]; } public void reset() throws IOException { lastLeft = Integer.MIN_VALUE; next = null; intervalQueue.clear(); for ( int i = computeFront(), k; i-- != 0; ) { k = front[ i ]; intervalIterator[ k ] = documentIterator[ k ].intervalIterator( index ); if ( intervalIterator[ k ] != IntervalIterators.TRUE && ( curr[ k ] = intervalIterator[ k ].nextInterval() ) != null ) intervalQueue.enqueue( k ); } } public void intervalTerms( final IntSet terms ) { final int frontSize = intervalQueue.front( intervalFront ); final int[] intervalFront = this.intervalFront; for( int i = frontSize; i-- != 0; ) intervalIterator[ intervalFront[ i ] ].intervalTerms( terms ); } public Interval nextInterval () throws IOException { if ( next != null ) { final Interval result = next; next = null; return result; } if ( intervalQueue.isEmpty() ) return null; int first; while ( curr[ first = intervalQueue.first() ].left <= lastLeft ) { if ( ( curr[ first ] = intervalIterator[ first ].nextInterval() ) != null ) intervalQueue.changed(); else { intervalQueue.dequeue(); if ( intervalQueue.isEmpty() ) return null; } } lastLeft = curr[ first ].left; return curr[ first ]; } public int extent() { int e, s; s = Integer.MAX_VALUE; for ( int i = computeFront(), k; i-- != 0; ) { k = front[ i ]; e = curr[ k ] != null ? intervalIterator[ k ].extent() : Integer.MAX_VALUE; if ( e < s ) s = e; } return s; } } /** An optimised interval iterator with the same semantics as that implemented * by {@link OrDocumentIterator}, but using just {@link IndexIterator#positionArray()}. */ protected class OrIndexIntervalIterator extends AbstractCompositeIndexIntervalIterator implements IntervalIterator { @SuppressWarnings({ "unused", "hiding" }) private final static boolean DEBUG = false; @SuppressWarnings("hiding") private final static boolean ASSERTS = false; /** The index of this iterator. */ final Index index; /** A heap-based semi-indirect priority queue used to keep track of the currently scanned positions. */ private final IntHeapSemiIndirectPriorityQueue positionQueue; /** An array to hold the front of the position queue. */ private final int[] positionFront; public OrIndexIntervalIterator( final Index index ) { super( n ); this.index = index; positionQueue = new IntHeapSemiIndirectPriorityQueue( curr ); positionFront = new int[ n ]; } public void reset() throws IOException { positionQueue.clear(); for ( int i = computeFront(), k; i-- != 0; ) { k = front[ i ]; if ( indexIterator[ k ].index() == index ) { position[ k ] = indexIterator[ k ].positionArray(); count[ k ] = indexIterator[ k ].count(); curr[ k ] = position[ k ][ currPos[ k ] = 0 ]; positionQueue.enqueue( k ); } } if ( ASSERTS ) assert ! positionQueue.isEmpty(); next = Interval.valueOf( curr[ positionQueue.first() ] ); } public void intervalTerms( final IntSet terms ) { final int frontSize = positionQueue.front( positionFront ); final int[] positionFront = this.positionFront; for( int i = frontSize; i-- != 0; ) terms.add( indexIterator[ positionFront[ i ] ].termNumber() ); } public Interval nextInterval() { if ( next != null ) { final Interval result = next; next = null; return result; } if ( positionQueue.isEmpty() ) return null; int first = positionQueue.first(); final int previous = curr[ first ]; do if ( ++currPos[ first ] == count[ first ] ) { positionQueue.dequeue(); if ( positionQueue.isEmpty() ) return null; } else { curr[ first ] = position[ first ][ currPos[ first ] ]; positionQueue.changed(); } while ( curr[ first = positionQueue.first() ] == previous ); return Interval.valueOf( curr[ first ] ); } public int extent() { return 1; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -