⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 aligndocumentiterator.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
字号:
package it.unimi.dsi.mg4j.search;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2008 Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;import it.unimi.dsi.fastutil.objects.ReferenceSet;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.index.IndexIterator;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import it.unimi.dsi.util.Interval;import java.io.IOException;/** A document iterator that aligns the results of a number of document iterators over * different indices. * * <p>This class is an example of cross-index computation. As in the case of an * {@link AndDocumentIterator}, we intersect the posting lists. However, once * we get to the index level, we actually return just intervals that appear in * <em>all</em> component iterators. Of course, this is meaningful only if all * indices represent different views on the same data, a typical example being  * semantic tagging.  *  * <p>An instance of this class exposes a single interval iterator associated to * the index of the <em>first</em> component iterator, as all interval iterators * are exhausted during the computation of their intersection. * Correspondingly, a call to {@link IntervalIterator#intervalTerms(IntSet)} just * returns the terms related to the <em>first</em> component iterator. */public class AlignDocumentIterator extends AbstractDocumentIterator {	private final static boolean DEBUG = false;	/** The first operand, to be aligned. */	final private DocumentIterator aligneeIterator;	/** The second operand, to be used to align the first operand. */	final private DocumentIterator alignerIterator;	/** {@link #aligneeIterator}, if it is an {@link IndexIterator}. */	final private IndexIterator aligneeIndexIterator;	/** {@link #alignerIterator}, if it is an {@link IndexIterator}. */	final private IndexIterator alignerIndexIterator;	/** The sole index involved in this iterator. */	final private Index index;	/** A singleton containing {@link #currentIterator}. */	final private Reference2ReferenceMap<Index,IntervalIterator> currentIterators;	/** An unmodifiable wrapper around {@link #currentIterator}. */	final private Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators;	/** The interval iterator associated to this document iterator, or <code>null</code>. */	private IntervalIterator intervalIterator;	/** The iterator returned for the current document, if any, or <code>null</code>. */	private IntervalIterator currentIterator;	/** Returns a document iterator that alignes the first iterator to the second.	 * 	 * @param aligneeIterator the iterator to be aligned.	 * @param alignerIterator the iterator used to align <code>aligneeIterator</code>.	 * 	 * @return a document iterator that computes the alignment of <code>aligneeIterator</code> on <code>alignerIterator</code>. 	 */	public static DocumentIterator getInstance( final DocumentIterator aligneeIterator, final DocumentIterator alignerIterator ) {		return new AlignDocumentIterator( aligneeIterator, alignerIterator );	}	protected AlignDocumentIterator( final DocumentIterator aligneeIterator, final DocumentIterator alignerIterator ) {		this.aligneeIterator = aligneeIterator;		this.alignerIterator = alignerIterator;		if ( aligneeIterator instanceof IndexIterator && alignerIterator instanceof IndexIterator ) {			aligneeIndexIterator = (IndexIterator)aligneeIterator; 			alignerIndexIterator = (IndexIterator)alignerIterator;		}		else aligneeIndexIterator = alignerIndexIterator = null;		if ( aligneeIterator.indices().size() != 1 || alignerIterator.indices().size() != 1 ) throw new IllegalArgumentException( "You can align single-index iterators only" );		index = aligneeIterator.indices().iterator().next();		currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( 1 );		unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators );	}	public ReferenceSet<Index> indices() {		return aligneeIterator.indices();	}		public int nextDocument() throws IOException {		if ( next >= 0 ) {			last = next;			next = -1;			return last;		}				currentIterators.clear();		currentIterator = null;		int alignee, aligner;		alignee = aligneeIterator.nextDocument();		aligner = alignerIterator.nextDocument();				while( alignee != -1 && aligner != -1 ) {			currentIterator = null;			if ( alignee < aligner ) alignee = aligneeIterator.nextDocument();			else if ( aligner < alignee ) aligner = alignerIterator.nextDocument();			else {				last = alignee;				if ( intervalIterator().hasNext() ) return last;				alignee = aligneeIterator.nextDocument();				aligner = alignerIterator.nextDocument();			}		}		return last = next = -1;	}			public int skipTo( final int n ) throws IOException {		// The easy case.		if ( last >= n ) return last;		if ( next >= n ) return nextDocument();		last = next = -1;				currentIterators.clear();		currentIterator = null;		int alignee, aligner;		alignee = aligneeIterator.skipTo( n );		aligner = alignerIterator.skipTo( n );				while( alignee != Integer.MAX_VALUE && aligner != Integer.MAX_VALUE ) {			currentIterator = null;			if ( alignee < aligner ) alignee = aligneeIterator.skipTo( aligner );			else if ( aligner < alignee ) aligner = alignerIterator.skipTo( alignee );			else {				last = alignee;				if ( intervalIterator().hasNext() ) {					next = -1;					return last;				}				alignee = aligneeIterator.nextDocument();				aligner = alignerIterator.nextDocument();			}		}				last = -1;		return Integer.MAX_VALUE;	}	public IntervalIterator intervalIterator() throws IOException {		return intervalIterator( index );	}	public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators() throws IOException {		currentIterators.put( index, intervalIterator() );		return unmodifiableCurrentIterators;	}	public IntervalIterator intervalIterator( final Index index ) throws IOException {		if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" );		if ( last == -1 ) throw new IllegalStateException(); 		if ( index != this.index ) return IntervalIterators.TRUE;		// If the iterator has been created and it's ready, we just return it.				if ( currentIterator != null ) return currentIterator;					final IntervalIterator aligneeIntervalIterator = aligneeIterator.intervalIterator(), alignerIntervalIterator = alignerIterator.intervalIterator();				if ( aligneeIntervalIterator == IntervalIterators.TRUE || alignerIntervalIterator == IntervalIterators.TRUE )			return currentIterator = aligneeIntervalIterator == alignerIntervalIterator ? IntervalIterators.TRUE : IntervalIterators.FALSE; 		if ( intervalIterator == null ) intervalIterator = aligneeIndexIterator == null ? new AlignIntervalIterator() : new AlignIndexIntervalIterator();		intervalIterator.reset();		return currentIterator = intervalIterator;	}		public void dispose() throws IOException {		aligneeIterator.dispose();		alignerIterator.dispose();	}		public boolean accept( final DocumentIteratorVisitor visitor ) throws IOException {		return visitor.visitPre( this ) && aligneeIterator.accept( visitor ) && alignerIterator.accept( visitor ) && visitor.visitPost( this );	}	public boolean acceptOnTruePaths( final DocumentIteratorVisitor visitor ) throws IOException {		return visitor.visitPre( this ) && aligneeIterator.acceptOnTruePaths( visitor ) && visitor.visitPost( this );	}		/** An interval iterator returning the intersection of the component interval iterators. */		private class AlignIntervalIterator extends AbstractIntervalIterator implements IntervalIterator {		/** The interval iterator of the alignee iterator. */		private IntervalIterator aligneeIntervalIterator;		/** The interval iterator of the aligner iterator. */		private IntervalIterator alignerIntervalIterator;		/** Whether the scan is over. */		private boolean endOfProcess;		public void reset() throws IOException {			next = null;			endOfProcess = false;			aligneeIntervalIterator = aligneeIterator.intervalIterator();			alignerIntervalIterator = alignerIterator.intervalIterator();			if ( aligneeIntervalIterator == IntervalIterators.TRUE || alignerIntervalIterator == IntervalIterators.TRUE ) {				// If by any chance we meet a TRUE iterator we are just false				endOfProcess = true;				return;			}		}		public void intervalTerms( final IntSet terms ) {			aligneeIntervalIterator.intervalTerms( terms );		}		public Interval nextInterval() throws IOException {			if ( next != null ) {				final Interval result = next;				next = null; 				return result;			}			if ( endOfProcess ) return null;						Interval aligneeInterval = null, alignerInterval = null;						aligneeInterval = aligneeIntervalIterator.nextInterval();			alignerInterval = alignerIntervalIterator.nextInterval();			if ( aligneeInterval == null || alignerInterval == null ) {				endOfProcess = true;				return null;			}						while ( ! aligneeInterval.equals( alignerInterval ) ) {				if ( aligneeInterval.left <= alignerInterval.left ) { 					if ( ( aligneeInterval = aligneeIntervalIterator.nextInterval() ) == null ) {						endOfProcess = true;						return null;					}				}				else {					if ( ( alignerInterval = alignerIntervalIterator.nextInterval() ) == null ) {						endOfProcess = true;						return null;					}				}			}			return aligneeInterval;		}				public int extent() {			return aligneeIntervalIterator.extent();		}	}	/** An interval iterator returning the intersection of the component interval iterators. */		private class AlignIndexIntervalIterator extends AbstractIntervalIterator implements IntervalIterator {		/** Whether the scan is over. */		private boolean endOfProcess;		/** The positions of the alignee iterator. */		private int[] aligneePosition;		/** The positions of the aligner iterator. */		private int[] alignerPosition;		/** The count of the alignee iterator. */		private int aligneeCount;		/** The count of the aligner iterator. */		private int alignerCount;		/** The position of the alignee iterator. */		private int aligneeCurr;		/** The position of the aligner iterator. */		private int alignerCurr;		public void reset() throws IOException {			next = null;			endOfProcess = false;						aligneePosition = aligneeIndexIterator.positionArray();			alignerPosition = alignerIndexIterator.positionArray();			aligneeCount = aligneeIndexIterator.count();			alignerCount = alignerIndexIterator.count();			aligneeCurr = alignerCurr = -1;		}				public void intervalTerms( final IntSet terms ) {			terms.add( aligneeIndexIterator.termNumber() );		}		public Interval nextInterval() {			if ( next != null ) {				final Interval result = next;				next = null; 				return result;			}						if ( endOfProcess ) return null;			final int[] aligneePosition = this.aligneePosition, alignerPosition = this.alignerPosition;						if ( ++aligneeCurr == aligneeCount || ++alignerCurr == alignerCount ) {				endOfProcess = true;				return null;			}						while ( aligneePosition[ aligneeCurr ] != alignerPosition[ alignerCurr ] ) {				if ( aligneePosition[ aligneeCurr ] < alignerPosition[ alignerCurr ] ) { 					if ( ++aligneeCurr == aligneeCount ) {						endOfProcess = true;						return null;					}				}				else {					if ( ++alignerCurr == alignerCount ) {						endOfProcess = true;						return null;					}				}			}			return Interval.valueOf( alignerPosition[ alignerCurr ] );		}				public int extent() {			return 1;		}	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -