📄 gammadeltagammadeltabitstreamindexreader.java
字号:
package it.unimi.dsi.mg4j.index.wired;/* * MG4J: Managing Gigabytes for Java * * Copyright (C) 2003-2006 Paolo Boldi and Sebastiano Vigna * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntIterator;import it.unimi.dsi.fastutil.ints.IntIterators;import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;import it.unimi.dsi.fastutil.objects.ReferenceSet;import it.unimi.dsi.mg4j.index.AbstractIndexIterator;import it.unimi.dsi.mg4j.index.AbstractIndexReader;import it.unimi.dsi.mg4j.index.BitStreamIndex;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.index.IndexIterator;import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;import it.unimi.dsi.mg4j.index.payload.Payload;import it.unimi.dsi.io.InputBitStream;import it.unimi.dsi.util.Interval;import it.unimi.dsi.mg4j.search.IntervalIterator;import it.unimi.dsi.mg4j.search.IntervalIterators;import it.unimi.dsi.bits.Fast;import it.unimi.dsi.Util;import java.io.IOException;import java.util.NoSuchElementException;import org.apache.log4j.Logger;public class GammaDeltaGammaDeltaBitStreamIndexReader extends AbstractIndexReader { @SuppressWarnings("unused") private static final Logger LOGGER = Util.getLogger( GammaDeltaGammaDeltaBitStreamIndexReader.class ); /** The reference index. */ protected final BitStreamIndex index; private final static boolean ASSERTS = false; private final static boolean DEBUG = false; /** The {@link IndexIterator} view of this reader (returned by {@link #documents(CharSequence)}). */ protected final BitStreamIndexReaderIndexIterator indexIterator; /** Creates a new skip index reader, with the specified underlying {@link Index} and input bit stream. * * @param index the index. * @param ibs the underlying bit stream. */ public GammaDeltaGammaDeltaBitStreamIndexReader( final BitStreamIndex index, final InputBitStream ibs ) { this.index = index; this.indexIterator = new BitStreamIndexReaderIndexIterator( this, ibs ); } protected static final class BitStreamIndexReaderIndexIterator extends AbstractIndexIterator implements IndexIterator { /** The enclosing instance. */ private final GammaDeltaGammaDeltaBitStreamIndexReader parent; /** The reference index. */ protected final BitStreamIndex index; /** The underlying input bit stream. */ protected final InputBitStream ibs; /** The enclosed interval iterator. */ private final IndexIntervalIterator intervalIterator; /** A singleton set containing the enclosed interval iterator. */ private final Reference2ReferenceMap<Index,IntervalIterator> singletonIntervalIterator; /** The key index. */ private final Index keyIndex; /** The cached copy of {@link #index index.pointerCoding}. */ protected final Coding pointerCoding; /** The cached copy of {@link #index index.countCoding}. */ protected final Coding countCoding; /** The cached copy of {@link #index index.positionCoding}. */ protected final Coding positionCoding; /** The current term. */ protected int currentTerm = -1; /** The current frequency. */ protected int frequency; /** Whether the current terms has pointers at all (this happens when the {@link #frequency} is smaller than the number of documents). */ protected boolean hasPointers; /** The current count (if this index contains counts). */ protected int count; /** The last document pointer we read from current list, -1 if we just read the frequency, * {@link Integer#MAX_VALUE} if we are beyond the end of list. */ protected int currentDocument; /** The number of the document record we are going to read inside the current inverted list. */ protected int numberOfDocumentRecord; /** This variable tracks the current state of the reader. */ protected int state; /** This value of {@link #state} can be assumed only in indices that contain a payload; it * means that we are positioned just before the payload for the current document record. */ private static final int BEFORE_PAYLOAD = 1; /** This value of {@link #state} can be assumed only in indices that contain counts; it * means that we are positioned just before the count for the current document record. */ private static final int BEFORE_COUNT = 2; /** This value of {@link #state} can be assumed only in indices that contain document positions; * it means that we are positioned just before the position list of the current document record. */ private static final int BEFORE_POSITIONS = 3; /** This value of {@link #state} means that we are at the start of a new document record, * unless we already read all documents (i.e., {@link #numberOfDocumentRecord} == {@link #frequency}), * in which case we are at the end of the inverted list, and {@link #endOfList()} is true. */ private static final int BEFORE_POINTER = 4; /** The cached position array. */ protected int[] positionCache = new int[ 16 ]; public BitStreamIndexReaderIndexIterator( final GammaDeltaGammaDeltaBitStreamIndexReader parent, final InputBitStream ibs ) { this.parent = parent; this.ibs = ibs; index = parent.index; keyIndex = index.keyIndex; pointerCoding = index.pointerCoding; if ( index.hasPayloads ) throw new IllegalStateException(); if ( ! index.hasCounts ) throw new IllegalStateException(); countCoding = index.countCoding; if ( ! index.hasPositions ) throw new IllegalStateException(); positionCoding = index.positionCoding; intervalIterator = index.hasPositions ? new IndexIntervalIterator() : null; singletonIntervalIterator = index.hasPositions ? Reference2ReferenceMaps.singleton( keyIndex, (IntervalIterator)intervalIterator ) : null; } /** Positions the index on the inverted list of a given term. * * <p>This method can be called at any time. Note that it is <em>always</em> possible * to call this method with argument 0, even if offsets have not been loaded. * * @param term a term. */ protected void position( final int term ) throws IOException { if ( term == 0 ) { ibs.position( 0 ); ibs.readBits( 0 ); } else { if ( index.offsets == null ) throw new IllegalStateException( "You cannot position an index without offsets" ); final long offset = index.offsets.getLong( term ); ibs.position( offset ); // TODO: Can't we set this to 0? ibs.readBits( offset ); } currentTerm = term; readFrequency(); } public int termNumber() { return currentTerm; } protected IndexIterator advance() throws IOException { if ( currentTerm == index.numberOfTerms - 1 ) return null; if ( currentTerm != -1 ) { skipTo( Integer.MAX_VALUE ); nextDocument(); // This guarantees we have no garbage before the frequency } currentTerm++; readFrequency(); return this; } private void readFrequency() throws IOException { // Read the frequency frequency = ibs.readGamma() + 1; hasPointers = frequency < index.numberOfDocuments; count = -1; currentDocument = -1; numberOfDocumentRecord = -1; state = BEFORE_POINTER; } public Index index() { return keyIndex; } public int frequency() { return frequency; } private void ensureCurrentDocument() { if ( currentDocument < 0 ) throw new IllegalStateException( "nextDocument() has never been called for (term=" + currentTerm + ")" ); if ( currentDocument == Integer.MAX_VALUE ) throw new IllegalStateException( "This reader is positioned beyond the end of list of (term=" + currentTerm + ")" ); } /** Returns whether there are no more document records in the current inverted list. * * <p>This method returns true if the last document pointer of the current inverted * list has been read. It makes no distinction as to where (inside the last document * record) this reader is currently positioned. In particular, this method will * return true independently of whether count and positions have been read or not (we * note by passing that this is the only sensible behaviour, as you can build indices * with or without counts/positions). * * <p>This method will return true also when this reader is positioned <em>beyond</em> * the last document pointer. In this case, {@link #currentDocumentPointer()} will * return {@link Integer#MAX_VALUE}. * * @return true whether there are no more document records in the current inverted list. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -