📄 documentalmergedclusterdocumentiterator.java
字号:
package it.unimi.dsi.mg4j.index.cluster;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2006-2007 Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntHeapSemiIndirectPriorityQueue;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.ReferenceArraySet;import it.unimi.dsi.fastutil.objects.ReferenceSet;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.search.AbstractDocumentIterator;import it.unimi.dsi.mg4j.search.DocumentIterator;import it.unimi.dsi.mg4j.search.IntervalIterator;import it.unimi.dsi.mg4j.search.IntervalIterators;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import java.io.IOException;/** A document iterator merging iterators from local indices. * * @author Sebastiano Vigna */public class DocumentalMergedClusterDocumentIterator extends AbstractDocumentIterator implements DocumentIterator { /** The component document iterators. */ final protected DocumentIterator[] documentIterator; /** The number of component iterators. */ final protected int n; /** The indices corresponding to each underlying document iterator. */ protected final int[] usedIndex; /** The cached strategy of the index we refer to. */ protected final DocumentalClusteringStrategy strategy; /** The queue of document iterator indices (offsets into {@link #documentIterator} and {@link #usedIndex}). */ protected final IntHeapSemiIndirectPriorityQueue queue; /** The reference array for the queue (containing <em>global</em> document pointers). */ protected final int[] globalDocumentPointer; /** The set of indices involved in this iterator. */ protected final ReferenceSet<Index> indices = new ReferenceArraySet<Index>(); /** The underlying index reader. */ private final DocumentalClusterIndexReader indexReader; /** The current iterator. */ protected int currentIterator = -1; /** Whether there are no more documents to be returned. */ protected boolean exhausted; /** Creates a new document iterator for a documental cluster. * * <p>This constructor uses an array of document iterators that it is not required to be full. * This is very useful with rare terms. * * @param indexReader the underlying index reader. * @param documentIterator an array of document iterators. * @param usedIndex an array parallel to <code>documentIterator</code> containing the ordinal numbers * of the indices corresponding to the iterators. */ public DocumentalMergedClusterDocumentIterator( final DocumentalClusterIndexReader indexReader, final DocumentIterator[] documentIterator, int[] usedIndex ) throws IOException { this.documentIterator = documentIterator; this.n = documentIterator.length; this.indexReader = indexReader; this.usedIndex = usedIndex; strategy = indexReader.index.strategy; globalDocumentPointer = new int[ n ]; queue = new IntHeapSemiIndirectPriorityQueue( globalDocumentPointer, n ); int result; for( int i = n; i-- != 0; ) { if ( ( result = documentIterator[ i ].nextDocument() ) != -1 ) { indices.addAll( documentIterator[ i ].indices() ); globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], result ); queue.enqueue( i ); } } if ( queue.isEmpty() ) exhausted = true; else { currentIterator = queue.first(); next = globalDocumentPointer[ currentIterator ]; } } public IntervalIterator intervalIterator() throws IOException { if ( last == -1 ) throw new IllegalStateException(); return documentIterator[ currentIterator ].intervalIterator(); } public IntervalIterator intervalIterator( Index index ) throws IOException { if ( last == -1 ) throw new IllegalStateException(); if ( ! indices.contains( index ) ) return IntervalIterators.TRUE; return documentIterator[ currentIterator ].intervalIterator( index ); } public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException { if ( last == -1 ) throw new IllegalStateException(); return documentIterator[ currentIterator ].intervalIterators(); } public ReferenceSet<Index> indices() { return indices; } // TODO: this needs tests public int skipTo( final int p ) throws IOException { int i, d; if ( p <= last ) return last; //System.err.println( "Advancing to " + n + " doc: " + Arrays.toString( doc ) + " first: " + queue.first() ); next = -1; while( ! queue.isEmpty() && globalDocumentPointer[ i = queue.first() ] < p ) { d = documentIterator[ i ].skipTo( strategy.localPointer( p ) ); if ( d == Integer.MAX_VALUE ) queue.dequeue(); else { globalDocumentPointer[ i ] = strategy.globalPointer( usedIndex[ i ], d ); if ( globalDocumentPointer[ i ] < p ) queue.dequeue(); // This covers the case of getting to the end of list without finding p else queue.changed(); } } if ( queue.isEmpty() ) { exhausted = true; last = -1; return Integer.MAX_VALUE; } return last = globalDocumentPointer[ currentIterator = queue.first() ]; } public int nextDocument() throws IOException { if ( next >= 0 ) { last = next; next = -1; return last; } if ( exhausted ) return last = -1; final int result; if ( ( result = documentIterator[ currentIterator ].nextDocument() ) != -1 ) { globalDocumentPointer[ currentIterator ] = strategy.globalPointer( usedIndex[ currentIterator ], result ); queue.changed(); } else queue.dequeue(); if ( queue.isEmpty() ) { exhausted = true; return last = -1; } currentIterator = queue.first(); return last = globalDocumentPointer[ currentIterator ]; } public boolean accept( DocumentIteratorVisitor visitor ) throws IOException { boolean result = true; for( DocumentIterator d: documentIterator ) if ( ! ( result &= d.accept( visitor ) ) ) break; return result; } public boolean acceptOnTruePaths( DocumentIteratorVisitor visitor ) throws IOException { return documentIterator[ currentIterator ].acceptOnTruePaths( visitor ); } public void dispose() throws IOException { indexReader.close(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -