⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bitstreamindexreader.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
    throw new IllegalStateException( "The required frequency coding (" + index.frequencyCoding + ") is not supported." );   }   hasPointers = frequency < index.numberOfDocuments;   // We compute the modulus used for pointer Golomb coding    if ( pointerCoding == Coding.GOLOMB ) {    if ( hasPointers ) {     b = BitStreamIndex.golombModulus( frequency, index.numberOfDocuments );     log2b = Fast.mostSignificantBit( b );    }   }   if ( hasSkips ) {    quantumBitLength = entryBitLength = -1;    lowest = Integer.MAX_VALUE;    if ( ASSERTS ) for( int i = height; i > Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i-- ) towerTopB[ i ] = towerLowerB[ i ] = pointerPrediction[ i ] = -1;    final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, index.numberOfDocuments, quantum );    for( int i = Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i >= 0; i-- ) {     towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 );     towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] );     towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i );     towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] );     pointerPrediction[ i ] = (int)( ( quantum * ( 1L << i ) * index.numberOfDocuments + frequency / 2 ) / frequency );    }   }   count = -1;   currentDocument = -1;   numberOfDocumentRecord = -1;   state = BEFORE_POINTER;  }  public Index index() {   return keyIndex;  }  public int frequency() {   return frequency;  }  private void ensureCurrentDocument() {   if ( currentDocument < 0 ) throw new IllegalStateException( "nextDocument() has never been called for (term=" + currentTerm + ")" );   if ( currentDocument == Integer.MAX_VALUE ) throw new IllegalStateException( "This reader is positioned beyond the end of list of (term=" + currentTerm + ")" );  }  /** Returns whether there are no more document records in the current inverted list.		 * 		 * <p>This method returns true if the last document pointer of the current inverted		 * list has been read. It makes no distinction as to where (inside the last document		 * record) this reader is currently positioned. In particular, this method will		 * return true independently of whether count and positions have been read or not (we		 * note by passing that this is the only sensible behaviour, as you can build indices		 * with or without counts/positions).		 * 		 * <p>This method will return true also when this reader is positioned <em>beyond</em>		 * the last document pointer. In this case, {@link #currentDocumentPointer()} will		 * return {@link Integer#MAX_VALUE}.		 *		 * @return true whether there are no more document records in the current inverted list.		 */  private boolean endOfList() {   if ( ASSERTS ) assert numberOfDocumentRecord <= frequency;   return numberOfDocumentRecord >= frequency - 1;  }  public int document() {   if ( ASSERTS ) ensureCurrentDocument();   return currentDocument;  }  public Payload payload() throws IOException {   if ( DEBUG ) System.err.println( this + ".payload()" );   if ( ASSERTS ) ensureCurrentDocument();   if ( ! hasPayloads )    throw new UnsupportedOperationException( "This index ("+ index + ") does not contain payloads" );   if ( state <= BEFORE_PAYLOAD ) {    if ( state == BEFORE_TOWER ) readTower();    payload.read( ibs );    state = hasCounts ? BEFORE_COUNT : BEFORE_POINTER;   }   return payload;  }  public int count() throws IOException {   if ( DEBUG ) System.err.println( this + ".count()" );   if ( count != -1 ) return count;   if ( ASSERTS ) ensureCurrentDocument();   if ( ! hasCounts )    throw new UnsupportedOperationException( "This index (" + index + ") does not contain counts" );   if ( state == BEFORE_TOWER ) readTower();   if ( state == BEFORE_PAYLOAD ) payload.read( ibs );  {   if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException();   state = hasPositions ? BEFORE_POSITIONS : BEFORE_POINTER;   switch( countCoding ) {   case UNARY:    count = ibs.readUnary() + 1;    break;   case SHIFTED_GAMMA:    count = ibs.readShiftedGamma() + 1;    break;   case GAMMA:    count = ibs.readGamma() + 1;    break;   case DELTA:    count = ibs.readDelta() + 1;    break;   default: throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." );   }  }   return count;  }  /** We read positions, assuming state <= BEFORE_POSITIONS */  @SuppressWarnings("unused")  protected void updatePositionCache() throws IOException {   if ( ASSERTS ) assert state <= BEFORE_POSITIONS;   if ( ! hasPositions )    throw new UnsupportedOperationException( "This index(" + index + ") does not contain positions" );   if ( state < BEFORE_POSITIONS ) {    if ( state == BEFORE_TOWER ) readTower();    if ( state == BEFORE_PAYLOAD ) payload.read( ibs );    if ( state == BEFORE_COUNT )  {   if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException();   switch( countCoding ) {   case UNARY:    count = ibs.readUnary() + 1;    break;   case SHIFTED_GAMMA:    count = ibs.readShiftedGamma() + 1;    break;   case GAMMA:    count = ibs.readGamma() + 1;    break;   case DELTA:    count = ibs.readDelta() + 1;    break;   default: throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." );   }  }   }    if ( count > positionCache.length ) positionCache = new int[ Math.max( positionCache.length * 2, count ) ];    final int[] occ = positionCache;    state = BEFORE_POINTER;    switch( positionCoding ) {    case SHIFTED_GAMMA:     ibs.readShiftedGammas( occ, count );     for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;     return;    case GAMMA:     ibs.readGammas( occ, count );     for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;     return;    case DELTA:     ibs.readDeltas( occ, count );     for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;     return;    case GOLOMB:     if ( ASSERTS ) assert index.sizes != null;     int docSize = index.sizes.getInt( currentDocument );     if ( count < 3 ) for( int i = 0; i < count; i++ ) occ[ i ] = ibs.readMinimalBinary( docSize );     else {      final int bb = BitStreamIndex.golombModulus( count, docSize );      int prev = -1;      if ( bb != 0 ) {       final int log2bb = Fast.mostSignificantBit( bb );       for( int i = 0; i < count; i++ ) occ[ i ] = prev = ibs.readGolomb( bb, log2bb ) + prev + 1;      }      else for ( int i = 0; i < count; i++ ) occ[ i ] = i;     }     return;    case SKEWED_GOLOMB:     if ( ASSERTS ) assert index.sizes != null;     int docSize2 = index.sizes.getInt( currentDocument );     if ( count < 3 ) for( int i = 0; i < count; i++ ) occ[ i ] = ibs.readMinimalBinary( docSize2 );     else {      final int sb = ibs.readMinimalBinary( docSize2 ) + 1;      int prev2 = -1;      for( int i = 0; i < count; i++ ) occ[ i ] = prev2 = ibs.readSkewedGolomb( sb ) + prev2 + 1;     }     return;    case INTERPOLATIVE:     it.unimi.dsi.mg4j.io.InterpolativeCoding.read( ibs, occ, 0, count, 0, index.sizes.getInt( currentDocument ) - 1 );     return;    default:     throw new IllegalStateException( "The required position coding (" + index.positionCoding + ") is not supported." );    }  }  public IntIterator positions() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache();   return IntIterators.wrap( positionCache, 0, count );  }  public int[] positionArray() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache();   return positionCache;  }  // TODO: check who's using this (positionArray() is actually faster now)  public int positions( final int[] position ) throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache(); // And also that positions have been read   if ( position.length < count ) return -count;   for( int i = count; i-- != 0; ) position[ i ] = this.positionCache[ i ];   return count;  }  public int nextDocument() throws IOException {   if ( DEBUG ) System.err.println( "{" + this + "} nextDocument()" );   if ( state != BEFORE_POINTER ) {    if ( state == BEFORE_TOWER ) readTower();    if ( state == BEFORE_PAYLOAD ) payload.read( ibs );    if ( state == BEFORE_COUNT )  {   if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException();   state = hasPositions ? BEFORE_POSITIONS : BEFORE_POINTER;   switch( countCoding ) {   case UNARY:    count = ibs.readUnary() + 1;    break;   case SHIFTED_GAMMA:    count = ibs.readShiftedGamma() + 1;    break;   case GAMMA:    count = ibs.readGamma() + 1;    break;   case DELTA:    count = ibs.readDelta() + 1;    break;   default: throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." );   }  }    if ( state == BEFORE_POSITIONS ) {     // Here we just skip; note that the state change is necessary if endOfList() is true     state = BEFORE_POINTER;     switch( positionCoding ) {     case SHIFTED_GAMMA:      ibs.skipShiftedGammas( count );      break;   case GAMMA:      ibs.skipGammas( count );      break;     case DELTA:      ibs.skipDeltas( count );    break;     case GOLOMB:      if ( ASSERTS ) assert index.sizes != null;      int docSize = index.sizes.getInt( currentDocument );      if ( count < 3 ) for( int i = 0; i < count; i++ ) ibs.readMinimalBinary( docSize );      else {       final int bb = BitStreamIndex.golombModulus( count, docSize );       if ( bb != 0 ) {        final int log2bb = Fast.mostSignificantBit( bb );        for( int i = 0; i < count; i++ ) ibs.readGolomb( bb, log2bb );       }      }      break;     case SKEWED_GOLOMB:      if ( ASSERTS ) assert index.sizes != null;      docSize = index.sizes.getInt( currentDocument );      if ( count < 3 ) for( int i = 0; i < count; i++ ) ibs.readMinimalBinary( docSize );      else {       final int sb = ibs.readMinimalBinary( docSize ) + 1;       for( int i = 0; i < count; i++ ) ibs.readSkewedGolomb( sb );      }      break;     case INTERPOLATIVE:      it.unimi.dsi.mg4j.io.InterpolativeCoding.read( ibs, null, 0, count, 0, index.sizes.getInt( currentDocument ) - 1 );      break;     default:      throw new IllegalStateException( "The required position coding (" + positionCoding + ") is not supported." );     }    }   }   if ( endOfList() ) return -1;   if ( hasPointers ) {// We do not write pointers for everywhere occurring terms.    switch( pointerCoding ) {    case SHIFTED_GAMMA:     currentDocument += ibs.readShiftedGamma() + 1;     break;    case GAMMA:     currentDocument += ibs.readGamma() + 1;     break;    case DELTA:     currentDocument += ibs.readDelta() + 1;     break;    case GOLOMB:     currentDocument += ibs.readGolomb( b, log2b ) + 1;     break;    default:     throw new IllegalStateException( "The required pointer coding (" + pointerCoding + ") is not supported." );    }   }   else currentDocument++;   numberOfDocumentRecord++;   if ( hasPayloads )    state = BEFORE_PAYLOAD;   else if ( hasCounts )    state = BEFORE_COUNT;   count = -1;   if ( hasSkips && ( numberOfDocumentRecord & quantumModuloMask ) == 0 ) state = BEFORE_TOWER;   return currentDocument;  }  /** Reads the entire skip tower for the current position. 		 */  private void readTower() throws IOException {   readTower( -1 );  }  /** Reads the skip tower for the current position, possibly skipping part of the tower. 		 * 		 * <P>Note that this method will update {@link #state} only if it reads the entire tower,		 * otherwise the state remains {@link #BEFORE_TOWER}.		 * 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -