📄 documentalconcatenatedclusterdocumentiterator.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.index.cluster;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2006-2007 Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.ReferenceArraySet;import it.unimi.dsi.fastutil.objects.ReferenceSet;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.search.AbstractDocumentIterator;import it.unimi.dsi.mg4j.search.DocumentIterator;import it.unimi.dsi.mg4j.search.IntervalIterator;import it.unimi.dsi.mg4j.search.IntervalIterators;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import java.io.IOException;import cern.colt.Arrays;/** A document iterator concatenating iterators from local indices. *   * @author Alessandro Arrabito * @author Sebastiano Vigna */public class DocumentalConcatenatedClusterDocumentIterator extends AbstractDocumentIterator implements DocumentIterator {	private static final boolean DEBUG = false;	private static final boolean ASSERTS = false;	/** The component document iterators. */	final protected DocumentIterator[] documentIterator;	/** The number of component iterators. */	final protected int n;	/** The indices corresponding to each underlying document iterator. */	protected final int[] documentIteratorIndex;	/** The cached strategy of the index we refer to. */	protected final ContiguousDocumentalStrategy strategy;	/** The current iterator (an index into {@link #documentIterator}). If it is equal to {@link #n},	 * it means that we hit the end of list on the last document iterator. Otherwise, {@link #last}	 * contains the last document ever returned or reached by {@link #skipTo(int)}. */	protected int currentIterator;	/** The last iterator to ever return something (an index into {@link #documentIterator}).*/	protected int lastIterator = -1;	/** The underlying index reader. */	private final DocumentalClusterIndexReader indexReader;	/** The set of indices involved in this iterator. */	final private ReferenceArraySet<Index> indices = new ReferenceArraySet<Index>();	/** Creates a new document iterator for a documental cluster.	 * 	 * <p>This constructor uses an array of document iterators that it is not required to be full.	 * This is very useful with rare terms.	 * 	 * @param indexReader the underlying index reader.	 * @param documentIterator an array of document iterators.	 * @param usedIndex an array parallel to <code>documentIterator</code> containing the number	 * of the indices corresponding to the iterators.	 */		public DocumentalConcatenatedClusterDocumentIterator( final DocumentalClusterIndexReader indexReader, final DocumentIterator[] documentIterator, int[] usedIndex ) {		this.documentIterator = documentIterator;		this.n = documentIterator.length;		this.indexReader = indexReader;		this.documentIteratorIndex = usedIndex;		this.strategy = (ContiguousDocumentalStrategy)indexReader.index.strategy;		for( int i = n; i-- != 0; ) {			if ( ! documentIterator[ i ].hasNext() ) throw new IllegalArgumentException( "All component document iterators must be nonempty" ); 			indices.addAll( documentIterator[ i ].indices() );				}	}	public IntervalIterator intervalIterator() throws IOException {		return documentIterator[ lastIterator ].intervalIterator();	}	public IntervalIterator intervalIterator( Index index ) throws IOException {		if ( ! indices.contains( index ) ) return IntervalIterators.TRUE;		return documentIterator[ lastIterator ].intervalIterator( index );	}	public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {		return documentIterator[ lastIterator ].intervalIterators();	}			public ReferenceSet<Index> indices() {		return indices;	}	public int skipTo( final int p ) throws IOException {		if ( DEBUG ) System.err.println( this + ": Requested to skip to " + p + "..." );		// In this case we are already beyond p		if ( last >= p ) return last;		// In this case, we are already beyond the last iterator		if ( currentIterator == n ) return Integer.MAX_VALUE;				next = -1;		// Otherwise, first we recover the local index that contains p		final int k = strategy.localIndex( p );				if ( DEBUG ) System.err.println( this + ": Moving to local index " + k );		if ( ASSERTS ) assert k >= documentIteratorIndex[ currentIterator ];				// Them we advance currentIterator until we get to index k.		while( currentIterator < n && documentIteratorIndex[ currentIterator ] < k ) currentIterator++;		// If currentIterator == n, we have been requested to skip to a cluster that does not contain pointers		int globalResult = Integer.MAX_VALUE;		if ( currentIterator < n ) {			// Now we skip to p inside the only index that might contain it.			globalResult = documentIterator[ currentIterator ].skipTo( strategy.localPointer( p ) );			if ( DEBUG ) System.err.println( this + ": Skipped to local pointer " + strategy.localPointer( p ) + " in iterator " + currentIterator + "; result: " + globalResult );			// 	If we got to the end of list, the first document beyond p is the first document of the next iterator (if any).			if ( globalResult == Integer.MAX_VALUE && ++currentIterator < n ) globalResult = documentIterator[ currentIterator ].nextDocument();		}		lastIterator = globalResult == Integer.MAX_VALUE ? -1 : currentIterator;		last = globalResult == Integer.MAX_VALUE ? Integer.MAX_VALUE : strategy.globalPointer( documentIteratorIndex[ currentIterator ], globalResult );		if ( DEBUG ) System.err.println( this + ": Will return " + last + " (lastIterator=" + lastIterator + ")" );		return last;	}	public int nextDocument() throws IOException {		if ( next >= 0 ) {			last = next;			next = -1;			return last;		}				if ( currentIterator == n ) return last = -1;		final int result = documentIterator[ currentIterator ].nextDocument();		if ( result != -1 ) return last = strategy.globalPointer( documentIteratorIndex[ lastIterator = currentIterator ], result );		currentIterator++;		/* Note that we are heavily exploiting the fact that only nonempty		 * iterators are present. */ 		if ( currentIterator < n ) return last = strategy.globalPointer( documentIteratorIndex[ currentIterator ], documentIterator[ lastIterator = currentIterator ].nextDocument() );		return last = -1;	}	// TODO: examine carefully the state change for lastIterator		public boolean accept( final DocumentIteratorVisitor visitor ) throws IOException {		boolean result = true;		for( DocumentIterator d: documentIterator ) if ( ! ( result &= d.accept( visitor ) ) ) break;		return result;	}		public boolean acceptOnTruePaths( final DocumentIteratorVisitor visitor ) throws IOException {		return documentIterator[ lastIterator ].acceptOnTruePaths( visitor );	}		public void dispose() throws IOException {		indexReader.close();	}	public String toString() {		return this.getClass().getSimpleName() + Arrays.toString( documentIterator );	}}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -