📄 aligndocumentiterator.java
字号:
package it.unimi.dsi.mg4j.search;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2008 Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.Reference2ReferenceArrayMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;import it.unimi.dsi.fastutil.objects.ReferenceSet;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.index.IndexIterator;import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;import it.unimi.dsi.util.Interval;import java.io.IOException;/** A document iterator that aligns the results of a number of document iterators over * different indices. * * <p>This class is an example of cross-index computation. As in the case of an * {@link AndDocumentIterator}, we intersect the posting lists. However, once * we get to the index level, we actually return just intervals that appear in * <em>all</em> component iterators. Of course, this is meaningful only if all * indices represent different views on the same data, a typical example being * semantic tagging. * * <p>An instance of this class exposes a single interval iterator associated to * the index of the <em>first</em> component iterator, as all interval iterators * are exhausted during the computation of their intersection. * Correspondingly, a call to {@link IntervalIterator#intervalTerms(IntSet)} just * returns the terms related to the <em>first</em> component iterator. */public class AlignDocumentIterator extends AbstractDocumentIterator { private final static boolean DEBUG = false; /** The first operand, to be aligned. */ final private DocumentIterator aligneeIterator; /** The second operand, to be used to align the first operand. */ final private DocumentIterator alignerIterator; /** {@link #aligneeIterator}, if it is an {@link IndexIterator}. */ final private IndexIterator aligneeIndexIterator; /** {@link #alignerIterator}, if it is an {@link IndexIterator}. */ final private IndexIterator alignerIndexIterator; /** The sole index involved in this iterator. */ final private Index index; /** A singleton containing {@link #currentIterator}. */ final private Reference2ReferenceMap<Index,IntervalIterator> currentIterators; /** An unmodifiable wrapper around {@link #currentIterator}. */ final private Reference2ReferenceMap<Index,IntervalIterator> unmodifiableCurrentIterators; /** The interval iterator associated to this document iterator, or <code>null</code>. */ private IntervalIterator intervalIterator; /** The iterator returned for the current document, if any, or <code>null</code>. */ private IntervalIterator currentIterator; /** Returns a document iterator that alignes the first iterator to the second. * * @param aligneeIterator the iterator to be aligned. * @param alignerIterator the iterator used to align <code>aligneeIterator</code>. * * @return a document iterator that computes the alignment of <code>aligneeIterator</code> on <code>alignerIterator</code>. */ public static DocumentIterator getInstance( final DocumentIterator aligneeIterator, final DocumentIterator alignerIterator ) { return new AlignDocumentIterator( aligneeIterator, alignerIterator ); } protected AlignDocumentIterator( final DocumentIterator aligneeIterator, final DocumentIterator alignerIterator ) { this.aligneeIterator = aligneeIterator; this.alignerIterator = alignerIterator; if ( aligneeIterator instanceof IndexIterator && alignerIterator instanceof IndexIterator ) { aligneeIndexIterator = (IndexIterator)aligneeIterator; alignerIndexIterator = (IndexIterator)alignerIterator; } else aligneeIndexIterator = alignerIndexIterator = null; if ( aligneeIterator.indices().size() != 1 || alignerIterator.indices().size() != 1 ) throw new IllegalArgumentException( "You can align single-index iterators only" ); index = aligneeIterator.indices().iterator().next(); currentIterators = new Reference2ReferenceArrayMap<Index,IntervalIterator>( 1 ); unmodifiableCurrentIterators = Reference2ReferenceMaps.unmodifiable( currentIterators ); } public ReferenceSet<Index> indices() { return aligneeIterator.indices(); } public int nextDocument() throws IOException { if ( next >= 0 ) { last = next; next = -1; return last; } currentIterators.clear(); currentIterator = null; int alignee, aligner; alignee = aligneeIterator.nextDocument(); aligner = alignerIterator.nextDocument(); while( alignee != -1 && aligner != -1 ) { currentIterator = null; if ( alignee < aligner ) alignee = aligneeIterator.nextDocument(); else if ( aligner < alignee ) aligner = alignerIterator.nextDocument(); else { last = alignee; if ( intervalIterator().hasNext() ) return last; alignee = aligneeIterator.nextDocument(); aligner = alignerIterator.nextDocument(); } } return last = next = -1; } public int skipTo( final int n ) throws IOException { // The easy case. if ( last >= n ) return last; if ( next >= n ) return nextDocument(); last = next = -1; currentIterators.clear(); currentIterator = null; int alignee, aligner; alignee = aligneeIterator.skipTo( n ); aligner = alignerIterator.skipTo( n ); while( alignee != Integer.MAX_VALUE && aligner != Integer.MAX_VALUE ) { currentIterator = null; if ( alignee < aligner ) alignee = aligneeIterator.skipTo( aligner ); else if ( aligner < alignee ) aligner = alignerIterator.skipTo( alignee ); else { last = alignee; if ( intervalIterator().hasNext() ) { next = -1; return last; } alignee = aligneeIterator.nextDocument(); aligner = alignerIterator.nextDocument(); } } last = -1; return Integer.MAX_VALUE; } public IntervalIterator intervalIterator() throws IOException { return intervalIterator( index ); } public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators() throws IOException { currentIterators.put( index, intervalIterator() ); return unmodifiableCurrentIterators; } public IntervalIterator intervalIterator( final Index index ) throws IOException { if ( DEBUG ) System.err.println( this + ".intervalIterator(" + index + ")" ); if ( last == -1 ) throw new IllegalStateException(); if ( index != this.index ) return IntervalIterators.TRUE; // If the iterator has been created and it's ready, we just return it. if ( currentIterator != null ) return currentIterator; final IntervalIterator aligneeIntervalIterator = aligneeIterator.intervalIterator(), alignerIntervalIterator = alignerIterator.intervalIterator(); if ( aligneeIntervalIterator == IntervalIterators.TRUE || alignerIntervalIterator == IntervalIterators.TRUE ) return currentIterator = aligneeIntervalIterator == alignerIntervalIterator ? IntervalIterators.TRUE : IntervalIterators.FALSE; if ( intervalIterator == null ) intervalIterator = aligneeIndexIterator == null ? new AlignIntervalIterator() : new AlignIndexIntervalIterator(); intervalIterator.reset(); return currentIterator = intervalIterator; } public void dispose() throws IOException { aligneeIterator.dispose(); alignerIterator.dispose(); } public boolean accept( final DocumentIteratorVisitor visitor ) throws IOException { return visitor.visitPre( this ) && aligneeIterator.accept( visitor ) && alignerIterator.accept( visitor ) && visitor.visitPost( this ); } public boolean acceptOnTruePaths( final DocumentIteratorVisitor visitor ) throws IOException { return visitor.visitPre( this ) && aligneeIterator.acceptOnTruePaths( visitor ) && visitor.visitPost( this ); } /** An interval iterator returning the intersection of the component interval iterators. */ private class AlignIntervalIterator extends AbstractIntervalIterator implements IntervalIterator { /** The interval iterator of the alignee iterator. */ private IntervalIterator aligneeIntervalIterator; /** The interval iterator of the aligner iterator. */ private IntervalIterator alignerIntervalIterator; /** Whether the scan is over. */ private boolean endOfProcess; public void reset() throws IOException { next = null; endOfProcess = false; aligneeIntervalIterator = aligneeIterator.intervalIterator(); alignerIntervalIterator = alignerIterator.intervalIterator(); if ( aligneeIntervalIterator == IntervalIterators.TRUE || alignerIntervalIterator == IntervalIterators.TRUE ) { // If by any chance we meet a TRUE iterator we are just false endOfProcess = true; return; } } public void intervalTerms( final IntSet terms ) { aligneeIntervalIterator.intervalTerms( terms ); } public Interval nextInterval() throws IOException { if ( next != null ) { final Interval result = next; next = null; return result; } if ( endOfProcess ) return null; Interval aligneeInterval = null, alignerInterval = null; aligneeInterval = aligneeIntervalIterator.nextInterval(); alignerInterval = alignerIntervalIterator.nextInterval(); if ( aligneeInterval == null || alignerInterval == null ) { endOfProcess = true; return null; } while ( ! aligneeInterval.equals( alignerInterval ) ) { if ( aligneeInterval.left <= alignerInterval.left ) { if ( ( aligneeInterval = aligneeIntervalIterator.nextInterval() ) == null ) { endOfProcess = true; return null; } } else { if ( ( alignerInterval = alignerIntervalIterator.nextInterval() ) == null ) { endOfProcess = true; return null; } } } return aligneeInterval; } public int extent() { return aligneeIntervalIterator.extent(); } } /** An interval iterator returning the intersection of the component interval iterators. */ private class AlignIndexIntervalIterator extends AbstractIntervalIterator implements IntervalIterator { /** Whether the scan is over. */ private boolean endOfProcess; /** The positions of the alignee iterator. */ private int[] aligneePosition; /** The positions of the aligner iterator. */ private int[] alignerPosition; /** The count of the alignee iterator. */ private int aligneeCount; /** The count of the aligner iterator. */ private int alignerCount; /** The position of the alignee iterator. */ private int aligneeCurr; /** The position of the aligner iterator. */ private int alignerCurr; public void reset() throws IOException { next = null; endOfProcess = false; aligneePosition = aligneeIndexIterator.positionArray(); alignerPosition = alignerIndexIterator.positionArray(); aligneeCount = aligneeIndexIterator.count(); alignerCount = alignerIndexIterator.count(); aligneeCurr = alignerCurr = -1; } public void intervalTerms( final IntSet terms ) { terms.add( aligneeIndexIterator.termNumber() ); } public Interval nextInterval() { if ( next != null ) { final Interval result = next; next = null; return result; } if ( endOfProcess ) return null; final int[] aligneePosition = this.aligneePosition, alignerPosition = this.alignerPosition; if ( ++aligneeCurr == aligneeCount || ++alignerCurr == alignerCount ) { endOfProcess = true; return null; } while ( aligneePosition[ aligneeCurr ] != alignerPosition[ alignerCurr ] ) { if ( aligneePosition[ aligneeCurr ] < alignerPosition[ alignerCurr ] ) { if ( ++aligneeCurr == aligneeCount ) { endOfProcess = true; return null; } } else { if ( ++alignerCurr == alignerCount ) { endOfProcess = true; return null; } } } return Interval.valueOf( alignerPosition[ alignerCurr ] ); } public int extent() { return 1; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -