⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gammadeltagammadeltabitstreamhpindexreader.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
     ibs.position( index.offsets.getLong( 1 ) );     positionsLength = ibs.readLongDelta();    }    ibs.position( 0 );    ibs.readBits( 0 );    lastPositionsOffset = ibs.readLongDelta(); // This is 0 for sure     positions.position( 0 );   }   else {    if ( index.offsets == null ) throw new IllegalStateException( "You cannot position an index without offsets" );    if ( ASSERTS ) {     // Get end of positions     if ( term < index.numberOfTerms - 1 ) {      ibs.position( index.offsets.getLong( term + 1 ) );      positionsLength = ibs.readLongDelta();     }     else positionsLength = Long.MAX_VALUE; // Presently, no way to check    }    ibs.position( index.offsets.getLong( term ) );    ibs.readBits( index.offsets.getLong( term ) );    // Let us position the positions bitstream    lastPositionsOffset = ibs.readLongDelta();    positions.position( lastPositionsOffset );    if ( ASSERTS && positionsLength != Long.MAX_VALUE ) positionsLength -= lastPositionsOffset;    if ( DEBUG && ASSERTS ) System.err.println( this + ": positions for term " + term + " start at bit " + lastPositionsOffset + " (" + positionsLength + " bits)" );   }   positions.readBits( 0 );   this.term = term;   readFrequency();  }  public int termNumber() {   return term;  }  protected IndexIterator advance() throws IOException {   if ( term == index.numberOfTerms - 1 ) return null;   if ( term != -1 ) {    skipTo( Integer.MAX_VALUE );    // This guarantees we have no garbage before the frequency    nextDocument();    positionsUnread = false;   }   // We must skip the offset into the positions bitstream   final long nextPositionsOffset = ibs.readLongDelta();   positions.skip( nextPositionsOffset - lastPositionsOffset - positions.readBits() );   lastPositionsOffset = nextPositionsOffset;   positions.readBits( 0 );   term++;   if ( ASSERTS ) positionsLength = -1; // Invalidate   readFrequency();   return this;  }  private void readFrequency() throws IOException {   // Read the frequency    frequency = ibs.readGamma() + 1;   if ( DEBUG ) System.err.println( this + ": Frequency for term " + term + " is " + frequency );   hasPointers = frequency < index.numberOfDocuments;   quantumBitLength = positionsQuantumBitLength = entryBitLength = -1;   lowest = Integer.MAX_VALUE;   if ( ASSERTS ) for ( int i = height; i > Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i-- )    towerTopB[ i ] = towerLowerB[ i ] = pointerPrediction[ i ] = -1;   final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, index.numberOfDocuments, quantum );   for ( int i = Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i >= 0; i-- ) {    towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 );    towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] );    towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i );    towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] );    pointerPrediction[ i ] = (int)( ( quantum * ( 1L << i ) * index.numberOfDocuments + frequency / 2 ) / frequency );   }   count = -1;   currentDocument = -1;   numberOfDocumentRecord = -1;   positionsBitsOffset = 0;   positionsBitSkip[ 0 ] = 0; // To avoid spurious tower updates on the first tower   positionsToReadToReachCurrentPosition = 0;   lastPositionsIncrement = 0;   state = BEFORE_POINTER;  }  public Index index() {   return keyIndex;  }  public int frequency() {   return frequency;  }  private void ensureCurrentDocument() {   if ( currentDocument < 0 ) throw new IllegalStateException( "nextDocument() has never been called for (term=" + term + ")" );   if ( currentDocument == Integer.MAX_VALUE ) throw new IllegalStateException( "This reader is positioned beyond the end of list of (term=" + term + ")" );  }  /**		 * Returns whether there are no more document records in the current inverted list.		 * 		 * <p>This method returns true if the last document pointer of the current inverted list		 * has been read. It makes no distinction as to where (inside the last document record) this		 * reader is currently positioned. In particular, this method will return true independently		 * of whether count and positions have been read or not (we note by passing that this is the		 * only sensible behaviour, as you can build indices with or without counts/positions).		 * 		 * <p>This method will return true also when this reader is positioned <em>beyond</em>		 * the last document pointer. In this case, {@link #currentDocumentPointer()} will return		 * {@link Integer#MAX_VALUE}.		 * 		 * @return true whether there are no more document records in the current inverted list.		 */  private boolean endOfList() {   if ( ASSERTS ) assert numberOfDocumentRecord <= frequency;   return numberOfDocumentRecord >= frequency - 1;  }  public int document() {   if ( ASSERTS ) ensureCurrentDocument();   return currentDocument;  }  public Payload payload() throws IOException {   throw new UnsupportedOperationException( "This index (" + index + ") does not contain payloads" );  }  public int count() throws IOException {   if ( DEBUG ) System.err.println( this + ".count()" );   if ( count != -1 ) return count;   if ( ASSERTS ) ensureCurrentDocument();   if ( state == BEFORE_TOWER ) readTower();   if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException();   state = BEFORE_POINTER;    count = ibs.readGamma() + 1;   return count;  }  protected void updatePositionCache() throws IOException {   if ( DEBUG ) System.err.println( this + ".updatePositionCache()" );   positionsUnread = false;   count(); // This will force reading the tower and updating positionsBitsOffset, if necessary   if ( positionsBitsOffset > positions.readBits() ) {    if ( DEBUG ) System.err.println( this + ": positionsBitsOffset=" + positionsBitsOffset + ", positions.readBits()=" + positions.readBits() + ", skipping by " + ( positionsBitsOffset - positions.readBits() ) );    positions.skip( positionsBitsOffset - positions.readBits() );   }   if ( ASSERTS ) assert positionsToReadToReachCurrentPosition >= 0 : positionsToReadToReachCurrentPosition + " < 0";   if ( positionsToReadToReachCurrentPosition > 0 ) {    if ( DEBUG ) System.err.println( this + ":Skipping sequentially " + positionsToReadToReachCurrentPosition + " positions..." );    // We skip, inside the current quantum, the positions we haven't read     if ( COOKIES ) {      positionsToReadToReachCurrentPosition--;      if ( positions.readDelta() != Integer.MAX_VALUE ) throw new AssertionError();     }     positions.skipDeltas( positionsToReadToReachCurrentPosition );   }   // We must fix it so that nextDocument() will restore it to 0   positionsToReadToReachCurrentPosition = -count;   if ( COOKIES ) positionsToReadToReachCurrentPosition--;   if ( count > positionCache.length ) positionCache = new int[ Math.max( positionCache.length * 2, count ) ];   final int[] occ = positionCache;    if ( COOKIES && positions.readDelta() != Integer.MAX_VALUE ) throw new AssertionError();    positions.readDeltas( occ, count );    for ( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;  }  public IntIterator positions() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( positionsUnread ) updatePositionCache();   return IntIterators.wrap( positionCache, 0, count );  }  public int[] positionArray() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( positionsUnread ) updatePositionCache();   return positionCache;  }  // TODO: check who's using this (positionArray() is actually faster now)  public int positions( final int[] position ) throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( positionsUnread ) updatePositionCache(); // And also that positions have                 // been read   if ( position.length < count ) return -count;   for ( int i = count; i-- != 0; )    position[ i ] = this.positionCache[ i ];   return count;  }  public int nextDocument() throws IOException {   if ( DEBUG ) System.err.println( "{" + this + "} nextDocument()" );   if ( state != BEFORE_POINTER ) {    if ( state == BEFORE_TOWER ) readTower();    if ( state == BEFORE_COUNT ) {      count = ibs.readGamma() + 1;     state = BEFORE_POINTER;    }   }   if ( endOfList() ) return -1;   if ( hasPointers ) {// We do not write pointers for everywhere occurring terms.     currentDocument += ibs.readDelta() + 1;   }   else currentDocument++;   numberOfDocumentRecord++;   if ( ASSERTS && numberOfDocumentRecord > quantum ) assert positionsBitsOffset > 0;   if ( ( numberOfDocumentRecord & quantumModuloMask ) == 0 ) {    state = BEFORE_TOWER;    positionsToReadToReachCurrentPosition = 0;   }   else {    state = BEFORE_COUNT;    if ( ASSERTS ) assert count > 0 : count + " <= " + 0;    positionsToReadToReachCurrentPosition += count;    if ( COOKIES ) positionsToReadToReachCurrentPosition++;   }   count = -1;   positionsUnread = true;   return currentDocument;  }  /**		 * Reads the entire skip tower for the current position. This method assumes that the tower		 * has been passed over sequentially, and correpondingly sets {@link #lastPositionsIncrement}		 * to the number of positions bits of the last quantum.		 */  private void readTower() throws IOException {   lastPositionsIncrement = maxh >= 0 ? positionsBitSkip[ 0 ] : 0;   if ( DEBUG ) System.err.println( this + ": Setting lastPositionsIncrement to " + lastPositionsIncrement + " in readTower()" );   readTower( -1 );   if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + lastPositionsIncrement + " in readTower()" );   // TODO: this should be moved into readTower(int)   positionsBitsOffset += lastPositionsIncrement;  }  /**		 * Reads the skip tower for the current position, possibly skipping part of the tower.		 * 		 * <P>Note that this method will update {@link #state} only if it reads the entire tower,		 * otherwise the state remains {@link #BEFORE_TOWER}.		 * 		 * @param pointer the tower will be read up to the first entry smaller than or equal to this		 * pointer; use -1 to guarantee that the entire tower will be read.		 */  private void readTower( final int pointer ) throws IOException {   int i, j, k, cacheOffset, cache, towerLength = 0;   long bitsAtTowerStart = 0;   boolean truncated = false;   if ( ASSERTS ) assert numberOfDocumentRecord % quantum == 0;   if ( ASSERTS && state != BEFORE_TOWER ) throw new IllegalStateException( "readTower() called in state " + state );   cacheOffset = ( numberOfDocumentRecord & wModuloMask );   k = cacheOffset >> quantumDivisionShift;   if ( ASSERTS && k == 0 ) { // Invalidate current tower data    it.unimi.dsi.fastutil.ints.IntArrays.fill( pointerSkip, Integer.MAX_VALUE );    it.unimi.dsi.fastutil.longs.LongArrays.fill( bitSkip, Integer.MAX_VALUE );    it.unimi.dsi.fastutil.longs.LongArrays.fill( positionsBitSkip, Integer.MAX_VALUE );   }   // Compute the height of the current skip tower.   s = ( k == 0 ) ? height : Fast.leastSignificantBit( k );   cache = frequency - w * ( numberOfDocumentRecord >> wDivisionShift );   if ( cache < w ) {    maxh = Fast.mostSignificantBit( ( cache >> quantumDivisionShift ) - k );    if ( maxh < s ) {     s = maxh;     truncated = true;    }    else truncated = false;   }   else {    cache = w;    maxh = height;    truncated = k == 0;   }   // assert w == cache || k == 0 || lastMaxh == Fast.mostSignificantBit( k ^ (   // cache/quantum ) ) : lastMaxh +","+ (Fast.mostSignificantBit( k ^ ( cache/quantum )   // ));   i = s;   if ( s >= 0 ) {    if ( k == 0 ) {     if ( quantumBitLength < 0 ) {      quantumBitLength = ibs.readDelta();      positionsQuantumBitLength = ibs.readDelta();      entryBitLength = ibs.readDelta();     }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -