⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skipgammadeltagammadeltabitstreamindexreader.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
package it.unimi.dsi.mg4j.index.wired;/*		  * MG4J: Managing Gigabytes for Java * * Copyright (C) 2003-2006 Paolo Boldi and Sebastiano Vigna  * *  This library is free software; you can redistribute it and/or modify it *  under the terms of the GNU Lesser General Public License as published by the Free *  Software Foundation; either version 2.1 of the License, or (at your option) *  any later version. * *  This library is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License *  for more details. * *  You should have received a copy of the GNU Lesser General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */import it.unimi.dsi.fastutil.ints.IntIterator;import it.unimi.dsi.fastutil.ints.IntIterators;import it.unimi.dsi.fastutil.ints.IntSet;import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;import it.unimi.dsi.fastutil.objects.ReferenceSet;import it.unimi.dsi.mg4j.index.AbstractIndexIterator;import it.unimi.dsi.mg4j.index.AbstractIndexReader;import it.unimi.dsi.mg4j.index.BitStreamIndex;import it.unimi.dsi.mg4j.index.Index;import it.unimi.dsi.mg4j.index.IndexIterator;import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;import it.unimi.dsi.mg4j.index.payload.Payload;import it.unimi.dsi.io.InputBitStream;import it.unimi.dsi.util.Interval;import it.unimi.dsi.mg4j.search.IntervalIterator;import it.unimi.dsi.mg4j.search.IntervalIterators;import it.unimi.dsi.bits.Fast;import it.unimi.dsi.Util;import java.io.IOException;import java.util.NoSuchElementException;import org.apache.log4j.Logger;public class SkipGammaDeltaGammaDeltaBitStreamIndexReader extends AbstractIndexReader { @SuppressWarnings("unused") private static final Logger LOGGER = Util.getLogger( SkipGammaDeltaGammaDeltaBitStreamIndexReader.class ); /** The reference index. */ protected final BitStreamIndex index; private final static boolean ASSERTS = false; private final static boolean DEBUG = false; /** The {@link IndexIterator} view of this reader (returned by {@link #documents(CharSequence)}). */ protected final BitStreamIndexReaderIndexIterator indexIterator; /** Creates a new skip index reader, with the specified underlying {@link Index} and input bit stream.	 *	 * @param index the index.	 * @param ibs the underlying bit stream.	 */ public SkipGammaDeltaGammaDeltaBitStreamIndexReader( final BitStreamIndex index, final InputBitStream ibs ) {  this.index = index;  this.indexIterator = new BitStreamIndexReaderIndexIterator( this, ibs ); } protected static final class BitStreamIndexReaderIndexIterator extends AbstractIndexIterator implements IndexIterator {  /** The enclosing instance. */  private final SkipGammaDeltaGammaDeltaBitStreamIndexReader parent;  /** The reference index. */  protected final BitStreamIndex index;  /** The underlying input bit stream. */  protected final InputBitStream ibs;  /** The enclosed interval iterator. */  private final IndexIntervalIterator intervalIterator;  /** A singleton set containing the enclosed interval iterator. */  private final Reference2ReferenceMap<Index,IntervalIterator> singletonIntervalIterator;  /** The key index. */  private final Index keyIndex;  /** The cached copy of {@link #index index.pointerCoding}. */  protected final Coding pointerCoding;  /** The cached copy of {@link #index index.countCoding}. */  protected final Coding countCoding;  /** The cached copy of {@link #index index.positionCoding}. */  protected final Coding positionCoding;  /** The current term. */  protected int currentTerm = -1;  /** The current frequency. */  protected int frequency;  /** Whether the current terms has pointers at all (this happens when the {@link #frequency} is smaller than the number of documents). */  protected boolean hasPointers;  /** The current count (if this index contains counts). */  protected int count;  /** The last document pointer we read from current list, -1 if we just read the frequency,		 * {@link Integer#MAX_VALUE} if we are beyond the end of list. */  protected int currentDocument;  /** The number of the document record we are going to read inside the current inverted list. */  protected int numberOfDocumentRecord;  /** This variable tracks the current state of the reader. */  protected int state;  /** The parameter <code>h</code> (the maximum height of a skip tower). */  public final int height;  /** The quantum. */  public final int quantum;  /** The bit mask giving the remainder of the division by {@link #quantum}. */  public final int quantumModuloMask;  /** The shift giving result of the division by {@link #quantum}. */  public final int quantumDivisionShift;  /** The maximum height of a skip tower in the current block. May be less than {@link #height} if the block is defective,		 * and will be -1 on defective quanta (no tower at all). */  private int maxh;  /** The maximum valid index of the current skip tower, if any. */  private int s;  /** The minimum valid index of the current skip tower, or {@link Integer#MAX_VALUE}. If {@link #maxh} is negative, the value is undefined. */  private int lowest;  /** We have <var>w</var> = <var>Hq</var>. */  private final int w;  /** The bit mask giving the remainder of the division by {@link #w}. */  private final int wModuloMask;  /** The shift giving result of the division by {@link #w}. */  private final int wDivisionShift;  /** The Golomb modulus for a top pointer skip, for each level. */  private int[] towerTopB;  /** The most significant bit of the Golomb modulus for a top point[]er skip, for each level. */  private int[] towerTopLog2B;  /** The Golomb modulus for a lower pointer skip, for each level. */  private int[] towerLowerB;  /** The most significant bit of the Golomb modulus for a lower pointer skip, for each level. */  private int[] towerLowerLog2B;  /** The prediction for a pointer skip, for each level. */  private int[] pointerPrediction;  /** An array to decode bit skips. */  private long[] bitSkip;  /** An array to decode the pointer skips. */  private int[] pointerSkip;  /** The number of bits read just after reading the last skip tower. */  private long readBitsAtLastSkipTower;  /** The document pointer corresponding to the last skip tower. */  private int pointerAtLastSkipTower;  /** The current quantum bit length, as provided by the index. */  private int quantumBitLength;  /** The current entry bit length, as provided by the index. */  private int entryBitLength;  /** This value of {@link #state} means that we are positioned just before a tower. */  private static final int BEFORE_TOWER = 0;  /** This value of {@link #state} can be assumed only in indices that contain a payload; it		 * means that we are positioned just before the payload for the current document record. */  private static final int BEFORE_PAYLOAD = 1;  /** This value of {@link #state} can be assumed only in indices that contain counts; it		 * means that we are positioned just before the count for the current document record. */  private static final int BEFORE_COUNT = 2;  /** This value of {@link #state} can be assumed only in indices that contain document positions; 		 * it means that we are positioned just before the position list of the current document record. */  private static final int BEFORE_POSITIONS = 3;  /** This value of {@link #state} means that we are at the start of a new document record, 		 * unless we already read all documents (i.e., {@link #numberOfDocumentRecord} == {@link #frequency}),		 * in which case we are at the end of the inverted list, and {@link #endOfList()} is true. */  private static final int BEFORE_POINTER = 4;  /** The cached position array. */  protected int[] positionCache = new int[ 16 ];  public BitStreamIndexReaderIndexIterator( final SkipGammaDeltaGammaDeltaBitStreamIndexReader parent, final InputBitStream ibs ) {   this.parent = parent;   this.ibs = ibs;   index = parent.index;   keyIndex = index.keyIndex;   pointerCoding = index.pointerCoding;   if ( index.hasPayloads ) throw new IllegalStateException();   if ( ! index.hasCounts ) throw new IllegalStateException();   countCoding = index.countCoding;   if ( ! index.hasPositions ) throw new IllegalStateException();   positionCoding = index.positionCoding;   intervalIterator = index.hasPositions ? new IndexIntervalIterator() : null;   singletonIntervalIterator = index.hasPositions ? Reference2ReferenceMaps.singleton( keyIndex, (IntervalIterator)intervalIterator ) : null;   quantum = index.quantum;   height = index.height;   if ( ( quantum == -1 ) != ( height == -1 ) ) throw new IllegalArgumentException();    w = ( 1 << height ) * quantum;    quantumModuloMask = quantum - 1;    wModuloMask = w - 1;    quantumDivisionShift = Fast.mostSignificantBit( quantum );    wDivisionShift = Fast.mostSignificantBit( w );    bitSkip = new long[ height + 1 ];    pointerSkip = new int[ height + 1 ];    towerTopB = new int[ height + 1 ];    towerTopLog2B = new int[ height + 1 ];    towerLowerB = new int[ height + 1 ];    towerLowerLog2B = new int[ height + 1 ];    pointerPrediction = new int[ height + 1 ];  }  /** Positions the index on the inverted list of a given term.		 *		 * <p>This method can be called at any time. Note that it is <em>always</em> possible		 * to call this method with argument 0, even if offsets have not been loaded.		 *		 * @param term a term.		 */  protected void position( final int term ) throws IOException {   if ( term == 0 ) {    ibs.position( 0 );    ibs.readBits( 0 );   }   else {    if ( index.offsets == null ) throw new IllegalStateException( "You cannot position an index without offsets" );    final long offset = index.offsets.getLong( term );    ibs.position( offset );    // TODO: Can't we set this to 0?    ibs.readBits( offset );   }   currentTerm = term;   readFrequency();  }  public int termNumber() {   return currentTerm;  }  protected IndexIterator advance() throws IOException {   if ( currentTerm == index.numberOfTerms - 1 ) return null;   if ( currentTerm != -1 ) {    skipTo( Integer.MAX_VALUE );    nextDocument(); // This guarantees we have no garbage before the frequency   }   currentTerm++;   readFrequency();   return this;  }  private void readFrequency() throws IOException {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -