📄 documentalconcatenatedclusterdocumentiterator.java
字号:
package it.unimi.dsi.mg4j.index.cluster;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2006-2007 Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.ReferenceArraySet;import it.unimi.dsi.fastutil.objects.ReferenceSet;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.search.AbstractDocumentIterator;import it.unimi.dsi.mg4j.search.DocumentIterator;import it.unimi.dsi.mg4j.search.IntervalIterator;import it.unimi.dsi.mg4j.search.IntervalIterators;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import java.io.IOException;import cern.colt.Arrays;/** A document iterator concatenating iterators from local indices. * * @author Alessandro Arrabito * @author Sebastiano Vigna */public class DocumentalConcatenatedClusterDocumentIterator extends AbstractDocumentIterator implements DocumentIterator { private static final boolean DEBUG = false; private static final boolean ASSERTS = false; /** The component document iterators. */ final protected DocumentIterator[] documentIterator; /** The number of component iterators. */ final protected int n; /** The indices corresponding to each underlying document iterator. */ protected final int[] documentIteratorIndex; /** The cached strategy of the index we refer to. */ protected final ContiguousDocumentalStrategy strategy; /** The current iterator (an index into {@link #documentIterator}). If it is equal to {@link #n}, * it means that we hit the end of list on the last document iterator. Otherwise, {@link #last} * contains the last document ever returned or reached by {@link #skipTo(int)}. */ protected int currentIterator; /** The last iterator to ever return something (an index into {@link #documentIterator}).*/ protected int lastIterator = -1; /** The underlying index reader. */ private final DocumentalClusterIndexReader indexReader; /** The set of indices involved in this iterator. */ final private ReferenceArraySet<Index> indices = new ReferenceArraySet<Index>(); /** Creates a new document iterator for a documental cluster. * * <p>This constructor uses an array of document iterators that it is not required to be full. * This is very useful with rare terms. * * @param indexReader the underlying index reader. * @param documentIterator an array of document iterators. * @param usedIndex an array parallel to <code>documentIterator</code> containing the number * of the indices corresponding to the iterators. */ public DocumentalConcatenatedClusterDocumentIterator( final DocumentalClusterIndexReader indexReader, final DocumentIterator[] documentIterator, int[] usedIndex ) { this.documentIterator = documentIterator; this.n = documentIterator.length; this.indexReader = indexReader; this.documentIteratorIndex = usedIndex; this.strategy = (ContiguousDocumentalStrategy)indexReader.index.strategy; for( int i = n; i-- != 0; ) { if ( ! documentIterator[ i ].hasNext() ) throw new IllegalArgumentException( "All component document iterators must be nonempty" ); indices.addAll( documentIterator[ i ].indices() ); } } public IntervalIterator intervalIterator() throws IOException { return documentIterator[ lastIterator ].intervalIterator(); } public IntervalIterator intervalIterator( Index index ) throws IOException { if ( ! indices.contains( index ) ) return IntervalIterators.TRUE; return documentIterator[ lastIterator ].intervalIterator( index ); } public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException { return documentIterator[ lastIterator ].intervalIterators(); } public ReferenceSet<Index> indices() { return indices; } public int skipTo( final int p ) throws IOException { if ( DEBUG ) System.err.println( this + ": Requested to skip to " + p + "..." ); // In this case we are already beyond p if ( last >= p ) return last; // In this case, we are already beyond the last iterator if ( currentIterator == n ) return Integer.MAX_VALUE; next = -1; // Otherwise, first we recover the local index that contains p final int k = strategy.localIndex( p ); if ( DEBUG ) System.err.println( this + ": Moving to local index " + k ); if ( ASSERTS ) assert k >= documentIteratorIndex[ currentIterator ]; // Them we advance currentIterator until we get to index k. while( currentIterator < n && documentIteratorIndex[ currentIterator ] < k ) currentIterator++; // If currentIterator == n, we have been requested to skip to a cluster that does not contain pointers int globalResult = Integer.MAX_VALUE; if ( currentIterator < n ) { // Now we skip to p inside the only index that might contain it. globalResult = documentIterator[ currentIterator ].skipTo( strategy.localPointer( p ) ); if ( DEBUG ) System.err.println( this + ": Skipped to local pointer " + strategy.localPointer( p ) + " in iterator " + currentIterator + "; result: " + globalResult ); // If we got to the end of list, the first document beyond p is the first document of the next iterator (if any). if ( globalResult == Integer.MAX_VALUE && ++currentIterator < n ) globalResult = documentIterator[ currentIterator ].nextDocument(); } lastIterator = globalResult == Integer.MAX_VALUE ? -1 : currentIterator; last = globalResult == Integer.MAX_VALUE ? Integer.MAX_VALUE : strategy.globalPointer( documentIteratorIndex[ currentIterator ], globalResult ); if ( DEBUG ) System.err.println( this + ": Will return " + last + " (lastIterator=" + lastIterator + ")" ); return last; } public int nextDocument() throws IOException { if ( next >= 0 ) { last = next; next = -1; return last; } if ( currentIterator == n ) return last = -1; final int result = documentIterator[ currentIterator ].nextDocument(); if ( result != -1 ) return last = strategy.globalPointer( documentIteratorIndex[ lastIterator = currentIterator ], result ); currentIterator++; /* Note that we are heavily exploiting the fact that only nonempty * iterators are present. */ if ( currentIterator < n ) return last = strategy.globalPointer( documentIteratorIndex[ currentIterator ], documentIterator[ lastIterator = currentIterator ].nextDocument() ); return last = -1; } // TODO: examine carefully the state change for lastIterator public boolean accept( final DocumentIteratorVisitor visitor ) throws IOException { boolean result = true; for( DocumentIterator d: documentIterator ) if ( ! ( result &= d.accept( visitor ) ) ) break; return result; } public boolean acceptOnTruePaths( final DocumentIteratorVisitor visitor ) throws IOException { return documentIterator[ lastIterator ].acceptOnTruePaths( visitor ); } public void dispose() throws IOException { indexReader.close(); } public String toString() { return this.getClass().getSimpleName() + Arrays.toString( documentIterator ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -