📄 abstractuniondocumentiterator.java
字号:
package it.unimi.dsi.mg4j.search;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2003-2007 Paolo Boldi and Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.IndirectPriorityQueue;import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import java.io.IOException;import java.util.Iterator;/** A document iterator on documents, generating the union of the documents returned * by a number of document iterators. * * <p>The pattern of this class is the same as that of {@link AbstractIntersectionDocumentIterator}. * Additionally, this class provides a mechanism that makes accessible the set of component * document iterators that are {@linkplain #computeFront() positioned on the current document}. */public abstract class AbstractUnionDocumentIterator extends AbstractCompositeDocumentIterator { private final static boolean DEBUG = false; //private final static boolean ASSERTS = false; /** A heap-based semi-indirect priority queue used to keep track of the currently scanned integers. */ final protected IntHeapSemiIndirectPriorityQueue queue; /** The {@link IndirectPriorityQueue#front(int[])} of {@link #queue}, if {@link #frontSize} is not -1. */ final protected int[] front; /** The reference array used for the queue. */ final protected int[] curr; /** A map from indices to interval iterators. */ final private Reference2ReferenceArrayMap<Index,IntervalIterator> intervalIterators; /** A map from indices to the iterators returned for the current document. The key set may * not contain an index because the related iterator has never been requested. Moreover, * the iterator in this map for a given index may differ from the one in {@link #intervalIterators} * because it could be {@link IntervalIterators#TRUE} (in fact, in that case it may even * happen that {@link #intervalIterators} does not contain the index). */ final private Reference2ReferenceArrayMap<Index,IntervalIterator> currentIterators; /** An unmodifiable wrapper around {@link #currentIterators}. */ final private Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators; /** The number of valid entries in {@link #front}, or -1 if the front has not been computed for the current document. */ protected int frontSize = -1; /** Creates a new document iterator that computes the OR of the given array of iterators. * @param documentIterator the iterators to be joined. * @throws IOException */ protected AbstractUnionDocumentIterator( final DocumentIterator... documentIterator ) throws IOException { super( documentIterator ); this.curr = new int[ n ]; queue = new IntHeapSemiIndirectPriorityQueue( curr ); intervalIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() ); currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( indices.size() ); unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators ); // Only add to the queue nonempty iterators... for ( int i = 0; i < n; i++ ) if ( ( curr[ i ] = documentIterator[ i ].nextDocument() ) != -1 ) queue.enqueue( i ); // If queue is empty, the process is over if ( ! queue.isEmpty() ) next = curr[ queue.first() ]; front = new int[ queue.size() ]; } public int skipTo( final int n ) throws IOException { if ( last >= n ) return last; if ( next >= n ) return nextDocument(); if ( queue.isEmpty() ) return Integer.MAX_VALUE; last = next = -1; currentIterators.clear(); frontSize = -1; // Invalidate front int first, res; while( curr[ first = queue.first() ] < n ) { res = documentIterator[ first ].skipTo( n ); // Cannot advance the minimum if ( res == Integer.MAX_VALUE ) { // Remove it queue.dequeue(); // If nothing else remains, we are done if ( queue.isEmpty() ) return Integer.MAX_VALUE; } else { // Advance the top element, and signal this fact to the queue curr[ first ] = res; queue.changed(); } } return last = curr[ first ]; } public int nextDocument() throws IOException { if ( next != -1 ) { // We already know what to return last = next; next = -1; return last; } currentIterators.clear(); frontSize = -1; // Invalidate front if ( queue.isEmpty() ) return last = -1; // The least element int first, c = curr[ queue.first() ]; // Advance all elements equal to the least one while( curr[ first = queue.first() ] == c ) { if ( ( curr[ first ] = documentIterator[ first ].nextDocument() ) != - 1 ) queue.changed(); else { // Remove it queue.dequeue(); // If nothing else remains, we are done if ( queue.isEmpty() ) return last = -1; } } return last = curr[ first ]; } /** Forces computation of the current front, returning the number of indices it contains. * * <p>After a call to this method, * the first elements of {@link #front} contain * the indices of the {@linkplain AbstractCompositeDocumentIterator#documentIterator component document iterators} * that are positioned on the current document. If the front has already been * computed for the current document, this method has no side effects. * * @return the size of the current front (the number of valid entries in {@link #front}). */ protected int computeFront() { if ( frontSize == -1 ) frontSize = queue.front( front ); return frontSize; } public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException { final Iterator<Index> i = indices.iterator(); while( i.hasNext() ) intervalIterator( i.next() ); return unmodifiableCurrentIterators; } public IntervalIterator intervalIterator( final Index index ) throws IOException { if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" ); if ( last == -1 ) throw new IllegalStateException(); if ( ! indices.contains( index ) ) return IntervalIterators.TRUE; IntervalIterator intervalIterator; // If the iterator has been created and it's ready, we just return it. if ( ( intervalIterator = currentIterators.get( index ) ) != null ) return intervalIterator; boolean atLeastOneIsTrue = false; /* If all iterators are TRUE, we return TRUE. Otherwise, * we skip all FALSE iterators and all iterators whose document * is not last, and merge the remaining ones. If * all iterators are FALSE, we return FALSE. */ // ALERT: this logic should be in OrDocumentIterator int c = 0; // This counts non-TRUE, non-FALSE interval iterators IntervalIterator oneNotTrue = null; for( int i = computeFront(); i -- != 0; ) { intervalIterator = documentIterator[ front[ i ] ].intervalIterator( index ); if ( intervalIterator == IntervalIterators.TRUE ) atLeastOneIsTrue = true; else { oneNotTrue = intervalIterator; if ( intervalIterator != IntervalIterators.FALSE ) c++; } } // If c == 0, all component iterators are either TRUE or FALSE. We return TRUE if at least one is TRUE. if ( c == 0 ) intervalIterator = atLeastOneIsTrue? IntervalIterators.TRUE : IntervalIterators.FALSE; else if ( c == 1 ) intervalIterator = oneNotTrue; // If we exclude TRUE and FALSE iterators, just oneNotTrue remains: we pass it upwards. else { intervalIterator = intervalIterators.get( index ); if ( intervalIterator == null ) intervalIterators.put( index, intervalIterator = getComposedIntervalIterator( index ) ); intervalIterator.reset(); } currentIterators.put( index, intervalIterator ); return intervalIterator; } abstract protected IntervalIterator getComposedIntervalIterator( Index index ); /** Invokes {@link #acceptOnTruePaths(DocumentIteratorVisitor)} only on component * iterators positioned on the current document. * * @param visitor a visitor. * @return true if the visit should continue. * @throws IOException */ @Override public boolean acceptOnTruePaths( DocumentIteratorVisitor visitor ) throws IOException { if ( ! visitor.visitPre( this ) ) return false; final int s = computeFront(); for( int i = 0; i < s; i++ ) if ( ! documentIterator[ front[ i ] ].acceptOnTruePaths( visitor ) ) return false; return visitor.visitPost( this ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -