📄 anddocumentiterator.java
字号:
package it.unimi.dsi.mg4j.search;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2003-2007 Paolo Boldi and Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.ObjectArrays;import it.unimi.dsi.fastutil.objects.ObjectHeapSemiIndirectPriorityQueue;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.util.Interval;import it.unimi.dsi.util.Intervals;import java.io.IOException;/** A document iterator that returns the AND of a number of document iterators. * * <P>This class adds to {@link it.unimi.dsi.mg4j.search.AbstractIntersectionDocumentIterator} * an interval interator generating the AND of the intervals returned for each of the documents involved. */public class AndDocumentIterator extends AbstractIntersectionDocumentIterator { private final static boolean DEBUG = false; /** Returns a document iterator that computes the AND of the given array of iterators. * * <P>Note that the special case of the empty and of the singleton arrays * are handled efficiently. * * @param numberOfDocuments the number of documents; relevant only if <code>it</code> has zero length. * @param documentIterator the iterators to be joined. * @return a document iterator that computes the AND of <code>it</code>. * @throws IOException */ public static DocumentIterator getInstance( final int numberOfDocuments, final DocumentIterator... documentIterator ) throws IOException { if ( documentIterator.length == 0 ) return NotDocumentIterator.getInstance( DocumentIterators.EMPTY_ITERATOR, numberOfDocuments ); if ( documentIterator.length == 1 ) return documentIterator[ 0 ]; return new AndDocumentIterator( documentIterator ); } /** Returns a document iterator that computes the AND of the given nonzero-length array of iterators. * * <P>Note that the special case of the singleton array is handled efficiently. * * @param documentIterator the iterators to be joined (at least one). * @return a document iterator that computes the AND of <code>it</code>. * @throws IOException */ public static DocumentIterator getInstance( final DocumentIterator... documentIterator ) throws IOException { if ( documentIterator.length == 0 ) throw new IllegalArgumentException(); return getInstance( -1, documentIterator ); } protected AndDocumentIterator( final DocumentIterator[] documentIterator ) throws IOException { super( documentIterator ); } protected IntervalIterator getComposedIntervalIterator( final Index index ) { return indexIterator == null ? new AndIntervalIterator( index ) : new AndIndexIntervalIterator( index ); } /** An interval iterator returning the AND (in the Clarke−Cormack−Burkowski lattice) of the component interval iterators. */ private class AndIntervalIterator extends AbstractCompositeIntervalIterator implements IntervalIterator { /** The index of this iterator. */ final private Index index; /** A heap-based indirect priority queue used to keep track of the currently scanned intervals. */ final private ObjectHeapSemiIndirectPriorityQueue<Interval> queue; /** Whether the scan is over. */ private boolean endOfProcess; /** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */ private int lastLeft; /** The maximum right extreme currently in the queue. */ private int maxRight; /** Creates a new AND interval iterator. * * @param index the index of the iterator. */ public AndIntervalIterator( final Index index ) { super( n ); // We just set up some internal data, but we perform no initialisation. this.index = index; queue = new ObjectHeapSemiIndirectPriorityQueue<Interval>( curr, Intervals.STARTS_BEFORE_OR_PROLONGS ); } public void reset() throws IOException { ObjectArrays.fill( curr, null ); queue.clear(); next = null; maxRight = Integer.MIN_VALUE; lastLeft = Integer.MIN_VALUE; endOfProcess = false; for ( int i = 0; i < n; i++ ) { intervalIterator[ i ] = documentIterator[ i ].intervalIterator( index ); if ( ! intervalIterator[ i ].hasNext() ) { // If by any chance we meet an empty (false) iterator we are just false endOfProcess = true; return; } // TRUE iterators need a special treatment if ( intervalIterator[ i ] != IntervalIterators.TRUE ) { curr[ i ] = intervalIterator[ i ].nextInterval(); queue.enqueue( i ); maxRight = Math.max( maxRight, curr[ i ].right ); } } /* At this point the queue could not be full because of TRUE iterators, but it's OK, * because TRUE is the neutral element of conjunction. However, we cannot handle here * the case in which all conjuncts are TRUE, as this case must be handled at a higher level. */ if ( queue.isEmpty() ) throw new IllegalArgumentException(); } public void intervalTerms( final IntSet terms ) { for( int i = n; i-- != 0; ) intervalIterator[ i ].intervalTerms( terms ); } public Interval nextInterval() throws IOException { if ( next != null ) { final Interval result = next; next = null; return result; } if ( endOfProcess ) return null; int first; while ( curr[ first = queue.first() ].left == lastLeft ) { if ( ( curr[ first ] = intervalIterator[ first ].nextInterval() ) == null ) { endOfProcess = true; return null; } maxRight = Math.max( maxRight, curr[ first ].right ); queue.changed(); } int nextLeft, nextRight; do { nextLeft = curr[ first = queue.first() ].left; nextRight = maxRight; if ( DEBUG ) System.err.println( this + " is saving interval " + Interval.valueOf( nextLeft, nextRight ) ); /* We check whether the current top is equal to the span of the queue, and * whether the top interval iterator is exhausted. In both cases, the * current span is guaranteed to be a minimal interval. */ if ( curr[ first ].right == maxRight || ( endOfProcess = ( curr[ first ] = intervalIterator[ first ].nextInterval() ) == null ) ) break; maxRight = Math.max( maxRight, curr[ first ].right ); queue.changed(); } while ( maxRight == nextRight ); return Interval.valueOf( lastLeft = nextLeft, nextRight ); } public int extent() { int s = 0; for ( int i = n; i-- != 0; ) s += intervalIterator[ i ].extent(); return s; } } /** An interval iterator returning the AND (in the Clarke−Cormack−Burkowski lattice) of the component interval iterators. */ private class AndIndexIntervalIterator extends AbstractCompositeIndexIntervalIterator implements IntervalIterator { /** The index of this iterator. */ final private Index index; /** A heap-based indirect priority queue used to keep track of the currently scanned positions. */ final private IntHeapSemiIndirectPriorityQueue queue; /** Whether the scan is over. */ private boolean endOfProcess; /** The left extreme of the last returned interval, or {@link Integer#MIN_VALUE} after a {@link #reset()}. */ private int lastLeft; /** The maximum right extreme currently in the queue. */ private int maxRight; /** Creates a new AND interval iterator. * * @param index the index of the iterator. */ public AndIndexIntervalIterator( final Index index ) { super( n ); // We just set up some internal data, but we perform no initialisation. this.index = index; queue = new IntHeapSemiIndirectPriorityQueue( curr ); } public void reset() throws IOException { queue.clear(); maxRight = Integer.MIN_VALUE; lastLeft = Integer.MIN_VALUE; next = null; endOfProcess = false; for ( int i = 0; i < n; i++ ) { // The case != index is identical to the TRUE case in AndIntervalIterator. if ( indexIterator[ i ].index() == index ) { position[ i ] = indexIterator[ i ].positionArray(); count[ i ] = indexIterator[ i ].count(); curr[ i ] = position[ i ][ currPos[ i ] = 0 ]; queue.enqueue( i ); maxRight = Math.max( maxRight, curr[ i ] ); } } /* At this point the queue could not be full because of TRUE iterators, but it's OK, * because TRUE is the neutral element of conjunction. However, we cannot handle here * the case in which all conjuncts are TRUE, as this case must be handled at a higher level. */ if ( queue.isEmpty() ) throw new IllegalArgumentException(); } public void intervalTerms( final IntSet terms ) { for( int i = n; i-- != 0; ) terms.add( indexIterator[ i ].termNumber() ); } public Interval nextInterval() { if ( next != null ) { final Interval result = next; next = null; return result; } if ( endOfProcess ) return null; int first; while ( curr[ first = queue.first() ] == lastLeft ) { if ( ++currPos[ first ] == count[ first ] ) { endOfProcess = true; return null; } curr[ first ] = position[ first ][ currPos[ first ] ]; maxRight = Math.max( maxRight, curr[ first ] ); queue.changed(); } int nextLeft, nextRight; do { nextLeft = curr[ first = queue.first() ]; nextRight = maxRight; if ( DEBUG ) System.err.println( this + " is saving interval " + Interval.valueOf( nextLeft, nextRight ) ); /* We check whether all iterators are on the same position, and * whether the top interval iterator is exhausted. In both cases, the * current span is guaranteed to be a minimal interval. */ if ( curr[ first ] == maxRight || ( endOfProcess = ++currPos[ first ] == count[ first ] ) ) break; curr[ first ] = position[ first ][ currPos[ first ] ]; if ( maxRight < curr[ first ] ) maxRight = curr[ first ]; queue.changed(); } while ( maxRight == nextRight ); return Interval.valueOf( lastLeft = nextLeft, nextRight ); } public int extent() { return n; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -