📄 bitstreamhpindexreader.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
   readFrequency();   return this;  }  private void readFrequency() throws IOException {   // Read the frequency   switch ( index.frequencyCoding ) {   case GAMMA:    frequency = ibs.readGamma() + 1;    break;   case SHIFTED_GAMMA:    frequency = ibs.readShiftedGamma() + 1;    break;   case DELTA:    frequency = ibs.readDelta() + 1;    break;   default:    throw new IllegalStateException( "The required frequency coding (" + index.frequencyCoding + ") is not supported." );   }   if ( DEBUG ) System.err.println( this + ": Frequency for term " + term + " is " + frequency );   hasPointers = frequency < index.numberOfDocuments;   // We compute the modulus used for pointer Golomb coding   if ( pointerCoding == Coding.GOLOMB ) {    if ( hasPointers ) {     b = BitStreamIndex.golombModulus( frequency, index.numberOfDocuments );     log2b = Fast.mostSignificantBit( b );    }   }   quantumBitLength = positionsQuantumBitLength = entryBitLength = -1;   lowest = Integer.MAX_VALUE;   if ( ASSERTS ) for ( int i = height; i > Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i-- )    towerTopB[ i ] = towerLowerB[ i ] = pointerPrediction[ i ] = -1;   final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, index.numberOfDocuments, quantum );   for ( int i = Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i >= 0; i-- ) {    towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 );    towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] );    towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i );    towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] );    pointerPrediction[ i ] = (int)( ( quantum * ( 1L << i ) * index.numberOfDocuments + frequency / 2 ) / frequency );   }   count = -1;   currentDocument = -1;   numberOfDocumentRecord = -1;   positionsBitsOffset = 0;   positionsBitSkip[ 0 ] = 0; // To avoid spurious tower updates on the first tower   positionsToReadToReachCurrentPosition = 0;   lastPositionsIncrement = 0;   state = BEFORE_POINTER;  }  public Index index() {   return keyIndex;  }  public int frequency() {   return frequency;  }  private void ensureCurrentDocument() {   if ( currentDocument < 0 ) throw new IllegalStateException( "nextDocument() has never been called for (term=" + term + ")" );   if ( currentDocument == Integer.MAX_VALUE ) throw new IllegalStateException( "This reader is positioned beyond the end of list of (term=" + term + ")" );  }  /**		 * Returns whether there are no more document records in the current inverted list.		 * 		 * <p>This method returns true if the last document pointer of the current inverted list		 * has been read. It makes no distinction as to where (inside the last document record) this		 * reader is currently positioned. In particular, this method will return true independently		 * of whether count and positions have been read or not (we note by passing that this is the		 * only sensible behaviour, as you can build indices with or without counts/positions).		 * 		 * <p>This method will return true also when this reader is positioned <em>beyond</em>		 * the last document pointer. In this case, {@link #currentDocumentPointer()} will return		 * {@link Integer#MAX_VALUE}.		 * 		 * @return true whether there are no more document records in the current inverted list.		 */  private boolean endOfList() {   if ( ASSERTS ) assert numberOfDocumentRecord <= frequency;   return numberOfDocumentRecord >= frequency - 1;  }  public int document() {   if ( ASSERTS ) ensureCurrentDocument();   return currentDocument;  }  public Payload payload() throws IOException {   throw new UnsupportedOperationException( "This index (" + index + ") does not contain payloads" );  }  public int count() throws IOException {   if ( DEBUG ) System.err.println( this + ".count()" );   if ( count != -1 ) return count;   if ( ASSERTS ) ensureCurrentDocument();   if ( state == BEFORE_TOWER ) readTower();   if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException();   state = BEFORE_POINTER;   switch ( countCoding ) {   case UNARY:    count = ibs.readUnary() + 1;    break;   case SHIFTED_GAMMA:    count = ibs.readShiftedGamma() + 1;    break;   case GAMMA:    count = ibs.readGamma() + 1;    break;   case DELTA:    count = ibs.readDelta() + 1;    break;   default:    throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." );   }   return count;  }  protected void updatePositionCache() throws IOException {   if ( DEBUG ) System.err.println( this + ".updatePositionCache()" );   positionsUnread = false;   count(); // This will force reading the tower and updating positionsBitsOffset, if necessary   if ( positionsBitsOffset > positions.readBits() ) {    if ( DEBUG ) System.err.println( this + ": positionsBitsOffset=" + positionsBitsOffset + ", positions.readBits()=" + positions.readBits() + ", skipping by " + ( positionsBitsOffset - positions.readBits() ) );    positions.skip( positionsBitsOffset - positions.readBits() );   }   if ( ASSERTS ) assert positionsToReadToReachCurrentPosition >= 0 : positionsToReadToReachCurrentPosition + " < 0";   if ( positionsToReadToReachCurrentPosition > 0 ) {    if ( DEBUG ) System.err.println( this + ":Skipping sequentially " + positionsToReadToReachCurrentPosition + " positions..." );    // We skip, inside the current quantum, the positions we haven't read    switch ( positionCoding ) {    case SHIFTED_GAMMA:     if ( COOKIES ) {      positionsToReadToReachCurrentPosition--;      if ( positions.readShiftedGamma() != Integer.MAX_VALUE ) throw new AssertionError();     }     positions.skipShiftedGammas( positionsToReadToReachCurrentPosition );     break;    case GAMMA:     if ( COOKIES ) {      positionsToReadToReachCurrentPosition--;      if ( positions.readGamma() != Integer.MAX_VALUE ) throw new AssertionError();     }     positions.skipGammas( positionsToReadToReachCurrentPosition );     break;    case DELTA:     if ( COOKIES ) {      positionsToReadToReachCurrentPosition--;      if ( positions.readDelta() != Integer.MAX_VALUE ) throw new AssertionError();     }     positions.skipDeltas( positionsToReadToReachCurrentPosition );     break;    default:     throw new IllegalStateException( "The required position coding (" + positionCoding + ") is not supported." );    }   }   // We must fix it so that nextDocument() will restore it to 0   positionsToReadToReachCurrentPosition = -count;   if ( COOKIES ) positionsToReadToReachCurrentPosition--;   if ( count > positionCache.length ) positionCache = new int[ Math.max( positionCache.length * 2, count ) ];   final int[] occ = positionCache;   switch ( positionCoding ) {   case SHIFTED_GAMMA:    if ( COOKIES && positions.readShiftedGamma() != Integer.MAX_VALUE ) throw new AssertionError();    positions.readShiftedGammas( occ, count );    for ( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;    return;   case GAMMA:    if ( COOKIES && positions.readGamma() != Integer.MAX_VALUE ) throw new AssertionError();    positions.readGammas( occ, count );    for ( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;    return;   case DELTA:    if ( COOKIES && positions.readDelta() != Integer.MAX_VALUE ) throw new AssertionError();    positions.readDeltas( occ, count );    for ( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;    return;   default:    throw new IllegalStateException( "The required position coding (" + index.positionCoding + ") is not supported." );   }  }  public IntIterator positions() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( positionsUnread ) updatePositionCache();   return IntIterators.wrap( positionCache, 0, count );  }  public int[] positionArray() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( positionsUnread ) updatePositionCache();   return positionCache;  }  // TODO: check who's using this (positionArray() is actually faster now)  public int positions( final int[] position ) throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( positionsUnread ) updatePositionCache(); // And also that positions have                 // been read   if ( position.length < count ) return -count;   for ( int i = count; i-- != 0; )    position[ i ] = this.positionCache[ i ];   return count;  }  public int nextDocument() throws IOException {   if ( DEBUG ) System.err.println( "{" + this + "} nextDocument()" );   if ( state != BEFORE_POINTER ) {    if ( state == BEFORE_TOWER ) readTower();    if ( state == BEFORE_COUNT ) {     switch ( countCoding ) {     case UNARY:      count = ibs.readUnary() + 1;      break;     case SHIFTED_GAMMA:      count = ibs.readShiftedGamma() + 1;      break;     case GAMMA:      count = ibs.readGamma() + 1;      break;     case DELTA:      count = ibs.readDelta() + 1;      break;     default:      throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." );     }     state = BEFORE_POINTER;    }   }   if ( endOfList() ) return -1;   if ( hasPointers ) {// We do not write pointers for everywhere occurring terms.    switch ( pointerCoding ) {    case SHIFTED_GAMMA:     currentDocument += ibs.readShiftedGamma() + 1;     break;    case GAMMA:     currentDocument += ibs.readGamma() + 1;     break;    case DELTA:     currentDocument += ibs.readDelta() + 1;     break;    case GOLOMB:     currentDocument += ibs.readGolomb( b, log2b ) + 1;     break;    default:     throw new IllegalStateException( "The required pointer coding (" + pointerCoding + ") is not supported." );    }   }   else currentDocument++;   numberOfDocumentRecord++;   if ( ASSERTS && numberOfDocumentRecord > quantum ) assert positionsBitsOffset > 0;   if ( ( numberOfDocumentRecord & quantumModuloMask ) == 0 ) {    state = BEFORE_TOWER;    positionsToReadToReachCurrentPosition = 0;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -