⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skipgammadeltagammadeltabitstreamindexreader.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
   // Read the frequency    frequency = ibs.readGamma() + 1;   hasPointers = frequency < index.numberOfDocuments;    quantumBitLength = entryBitLength = -1;    lowest = Integer.MAX_VALUE;    if ( ASSERTS ) for( int i = height; i > Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i-- ) towerTopB[ i ] = towerLowerB[ i ] = pointerPrediction[ i ] = -1;    final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, index.numberOfDocuments, quantum );    for( int i = Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i >= 0; i-- ) {     towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 );     towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] );     towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i );     towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] );     pointerPrediction[ i ] = (int)( ( quantum * ( 1L << i ) * index.numberOfDocuments + frequency / 2 ) / frequency );    }   count = -1;   currentDocument = -1;   numberOfDocumentRecord = -1;   state = BEFORE_POINTER;  }  public Index index() {   return keyIndex;  }  public int frequency() {   return frequency;  }  private void ensureCurrentDocument() {   if ( currentDocument < 0 ) throw new IllegalStateException( "nextDocument() has never been called for (term=" + currentTerm + ")" );   if ( currentDocument == Integer.MAX_VALUE ) throw new IllegalStateException( "This reader is positioned beyond the end of list of (term=" + currentTerm + ")" );  }  /** Returns whether there are no more document records in the current inverted list.		 * 		 * <p>This method returns true if the last document pointer of the current inverted		 * list has been read. It makes no distinction as to where (inside the last document		 * record) this reader is currently positioned. In particular, this method will		 * return true independently of whether count and positions have been read or not (we		 * note by passing that this is the only sensible behaviour, as you can build indices		 * with or without counts/positions).		 * 		 * <p>This method will return true also when this reader is positioned <em>beyond</em>		 * the last document pointer. In this case, {@link #currentDocumentPointer()} will		 * return {@link Integer#MAX_VALUE}.		 *		 * @return true whether there are no more document records in the current inverted list.		 */  private boolean endOfList() {   if ( ASSERTS ) assert numberOfDocumentRecord <= frequency;   return numberOfDocumentRecord >= frequency - 1;  }  public int document() {   if ( ASSERTS ) ensureCurrentDocument();   return currentDocument;  }  public Payload payload() throws IOException {   if ( DEBUG ) System.err.println( this + ".payload()" );   if ( ASSERTS ) ensureCurrentDocument();    throw new UnsupportedOperationException( "This index ("+ index + ") does not contain payloads" );  }  public int count() throws IOException {   if ( DEBUG ) System.err.println( this + ".count()" );   if ( count != -1 ) return count;   if ( ASSERTS ) ensureCurrentDocument();   if ( state == BEFORE_TOWER ) readTower();  {   state = BEFORE_POSITIONS;    count = ibs.readGamma() + 1;  }   return count;  }  /** We read positions, assuming state <= BEFORE_POSITIONS */  @SuppressWarnings("unused")  protected void updatePositionCache() throws IOException {   if ( ASSERTS ) assert state <= BEFORE_POSITIONS;   if ( state < BEFORE_POSITIONS ) {    if ( state == BEFORE_TOWER ) readTower();    if ( state == BEFORE_COUNT )  {   state = BEFORE_POSITIONS;    count = ibs.readGamma() + 1;  }   }    if ( count > positionCache.length ) positionCache = new int[ Math.max( positionCache.length * 2, count ) ];    final int[] occ = positionCache;    state = BEFORE_POINTER;     ibs.readDeltas( occ, count );     for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;  }  public IntIterator positions() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache();   return IntIterators.wrap( positionCache, 0, count );  }  public int[] positionArray() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache();   return positionCache;  }  // TODO: check who's using this (positionArray() is actually faster now)  public int positions( final int[] position ) throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache(); // And also that positions have been read   if ( position.length < count ) return -count;   for( int i = count; i-- != 0; ) position[ i ] = this.positionCache[ i ];   return count;  }  public int nextDocument() throws IOException {   if ( DEBUG ) System.err.println( "{" + this + "} nextDocument()" );   if ( state != BEFORE_POINTER ) {    if ( state == BEFORE_TOWER ) readTower();    if ( state == BEFORE_COUNT )  {   state = BEFORE_POSITIONS;    count = ibs.readGamma() + 1;  }    if ( state == BEFORE_POSITIONS ) {     // Here we just skip; note that the state change is necessary if endOfList() is true     state = BEFORE_POINTER;      ibs.skipDeltas( count );    }   }   if ( endOfList() ) return -1;   if ( hasPointers ) {// We do not write pointers for everywhere occurring terms.     currentDocument += ibs.readDelta() + 1;   }   else currentDocument++;   numberOfDocumentRecord++;    state = BEFORE_COUNT;   count = -1;   if ( ( numberOfDocumentRecord & quantumModuloMask ) == 0 ) state = BEFORE_TOWER;   return currentDocument;  }  /** Reads the entire skip tower for the current position. 		 */  private void readTower() throws IOException {   readTower( -1 );  }  /** Reads the skip tower for the current position, possibly skipping part of the tower. 		 * 		 * <P>Note that this method will update {@link #state} only if it reads the entire tower,		 * otherwise the state remains {@link #BEFORE_TOWER}.		 * 		 * @param pointer the tower will be read up to the first entry smaller than or equal to this pointer; use		 * -1 to guarantee that the entire tower will be read.		 */  private void readTower( final int pointer ) throws IOException {   int i, j, k, cacheOffset, cache, towerLength = 0;   long bitsAtTowerStart = 0;   boolean truncated = false;   if ( ASSERTS ) assert numberOfDocumentRecord % quantum == 0;   if ( ASSERTS && state != BEFORE_TOWER ) throw new IllegalStateException( "readTower() called in state " + state );   cacheOffset = ( numberOfDocumentRecord & wModuloMask );   k = cacheOffset >> quantumDivisionShift;   if ( ASSERTS && k == 0 ) { // Invalidate current tower data    it.unimi.dsi.fastutil.ints.IntArrays.fill( pointerSkip, Integer.MAX_VALUE );    it.unimi.dsi.fastutil.longs.LongArrays.fill( bitSkip, Integer.MAX_VALUE );   }   // Compute the height of the current skip tower.   s = ( k == 0 )? height : Fast.leastSignificantBit( k );   cache = frequency - w * ( numberOfDocumentRecord >> wDivisionShift );   if ( cache < w ) {    maxh = Fast.mostSignificantBit( ( cache >> quantumDivisionShift ) - k );    if ( maxh < s ) {     s = maxh;     truncated = true;    } else truncated = false;   }   else {    cache = w;    maxh = height;    truncated = k == 0;   }   //assert w == cache || k == 0 || lastMaxh == Fast.mostSignificantBit( k ^ ( cache/quantum ) )  : lastMaxh +","+ (Fast.mostSignificantBit( k ^ ( cache/quantum ) ));   i = s;   if ( s >= 0 ) {    if ( k == 0 ) {     if ( quantumBitLength < 0 ) {      quantumBitLength = ibs.readDelta();      entryBitLength = ibs.readDelta();     }     else {      quantumBitLength += Fast.nat2int( ibs.readDelta() );      entryBitLength += Fast.nat2int( ibs.readDelta() );     }     if ( DEBUG ) System.err.println( "{" + this + "} quantum bit length=" + quantumBitLength + " entry bit length=" + entryBitLength );    }    if ( DEBUG ) System.err.println( "{" + this + "} Reading tower; pointer=" + pointer + " maxh=" + maxh + " s=" + s );    if ( s > 0 ) {     towerLength = entryBitLength * ( s + 1 ) + Fast.nat2int( ibs.readDelta() );     if ( DEBUG ) System.err.println( "{" + this + "} Tower length=" + towerLength );    }    // We store the number of bits read at the start of the tower (just after the length).    bitsAtTowerStart = ibs.readBits();    if ( truncated ) {     if ( DEBUG ) System.err.println( "{" + this + "} Truncated--reading tops" );     // We read the tower top.     pointerSkip[ s ] = Fast.nat2int( ibs.readGolomb( towerTopB[ s ], towerTopLog2B[ s ] ) ) + pointerPrediction[ s ];     bitSkip[ s ] = quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) + Fast.nat2int( ibs.readLongDelta() );    }    else {     // We copy the tower top from the lowest inherited entry suitably updated.     pointerSkip[ s ] = pointerSkip[ s + 1 ] - ( currentDocument - pointerAtLastSkipTower );     bitSkip[ s ] = bitSkip[ s + 1 ] - ( bitsAtTowerStart - readBitsAtLastSkipTower ) - towerLength;    }    // We read the remaining part of the tower, at least until we point after pointer.    if ( currentDocument + pointerSkip[ i ] > pointer ) {     for( i = s - 1; i >= 0; i-- ) {      pointerSkip[ i ] = Fast.nat2int( ibs.readGolomb( towerLowerB[ i ], towerLowerLog2B[ i ] ) ) + pointerSkip[ i + 1 ] / 2;      bitSkip[ i ] = ( bitSkip[ i + 1 ] - entryBitLength * ( i + 1 ) ) / 2 - Fast.nat2int( ibs.readLongDelta() );      if ( DEBUG && currentDocument + pointerSkip[ i ] <= pointer ) System.err.println( "{" + this + "} stopping reading at i=" + i + " as currentDocument (" + currentDocument + ") plus pointer skip (" + pointerSkip[ i ] + ") is smaller than or equal target (" + pointer +")" );      if ( currentDocument + pointerSkip[ i ] <= pointer ) break;     }    }   }   /* If we did not read the entire tower, we need to fix the skips we read (as they			 * are offsets from the *end* of the tower) and moreover we must make unusable the			 * rest of the tower (for asserts). */   if ( i > 0 ) {    final long fix = ibs.readBits() - bitsAtTowerStart;    for( j = s; j >= i; j-- ) bitSkip[ j ] += towerLength - fix;    if ( ASSERTS ) for( ; j >= 0; j-- ) pointerSkip[ j ] = Integer.MAX_VALUE;   }   else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -