⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gammadeltagammadeltabitstreamhpindexreader.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
     else {      quantumBitLength += Fast.nat2int( ibs.readDelta() );      positionsQuantumBitLength += Fast.nat2int( ibs.readDelta() );      entryBitLength += Fast.nat2int( ibs.readDelta() );     }     if ( DEBUG ) System.err.println( "{" + this + "} quantum bit length=" + quantumBitLength + " positions quantum bit length=" + positionsQuantumBitLength + " entry bit length=" + entryBitLength );    }    if ( DEBUG ) System.err.println( "{" + this + "} Reading tower; pointer=" + pointer + " maxh=" + maxh + " s=" + s );    if ( s > 0 ) {     towerLength = entryBitLength * ( s + 1 ) + Fast.nat2int( ibs.readDelta() );     if ( DEBUG ) System.err.println( "{" + this + "} Tower length=" + towerLength );    }    // We store the number of bits read at the start of the tower (just after the    // length).    bitsAtTowerStart = ibs.readBits();    if ( truncated ) {     if ( DEBUG ) System.err.println( "{" + this + "} Truncated--reading tops" );     // We read the tower top.     pointerSkip[ s ] = Fast.nat2int( ibs.readGolomb( towerTopB[ s ], towerTopLog2B[ s ] ) ) + pointerPrediction[ s ];     bitSkip[ s ] = quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) + Fast.nat2int( ibs.readLongDelta() );     positionsBitSkip[ s ] = positionsQuantumBitLength * ( 1 << s ) + Fast.nat2int( ibs.readLongDelta() );    }    else {     // We copy the tower top from the lowest inherited entry suitably updated.     pointerSkip[ s ] = pointerSkip[ s + 1 ] - ( currentDocument - pointerAtLastSkipTower );     bitSkip[ s ] = bitSkip[ s + 1 ] - ( bitsAtTowerStart - readBitsAtLastSkipTower ) - towerLength;     // TODO: note that we could use the same method for pointers and bit skips!     positionsBitSkip[ s ] = positionsBitSkip[ s + 1 ] - positionsBitSkip[ s ];     if ( ASSERTS ) assert positionsBitSkip[ s ] > 0 : positionsBitSkip[ s ] + " <= " + 0;    }    // We read the remaining part of the tower, at least until we point after pointer.    if ( currentDocument + pointerSkip[ i ] > pointer ) {     for ( i = s - 1; i >= 0; i-- ) {      pointerSkip[ i ] = Fast.nat2int( ibs.readGolomb( towerLowerB[ i ], towerLowerLog2B[ i ] ) ) + pointerSkip[ i + 1 ] / 2;      bitSkip[ i ] = ( bitSkip[ i + 1 ] - entryBitLength * ( i + 1 ) ) / 2 - Fast.nat2int( ibs.readLongDelta() );      positionsBitSkip[ i ] = positionsBitSkip[ i + 1 ] / 2 - Fast.nat2int( ibs.readLongDelta() );      if ( DEBUG && currentDocument + pointerSkip[ i ] <= pointer ) System.err.println( "{" + this + "} stopping reading at i=" + i + " as currentDocument (" + currentDocument        + ") plus pointer skip (" + pointerSkip[ i ] + ") is smaller than or equal target (" + pointer + ")" );      if ( currentDocument + pointerSkip[ i ] <= pointer ) break;     }    }   }   /*			 * If we did not read the entire tower, we need to fix the skips we read (as they are			 * offsets from the *end* of the tower) and moreover we must make unusable the rest of			 * the tower (for asserts).			 */   if ( i > 0 ) {    final long fix = ibs.readBits() - bitsAtTowerStart;    for ( j = s; j >= i; j-- )     bitSkip[ j ] += towerLength - fix;    if ( ASSERTS ) for ( ; j >= 0; j-- )     pointerSkip[ j ] = Integer.MAX_VALUE;   }   else state = BEFORE_COUNT;   // We update the inherited tower.   final long deltaBits = ibs.readBits() - readBitsAtLastSkipTower;   final int deltaPointers = currentDocument - pointerAtLastSkipTower;   if ( ASSERTS ) assert lastPositionsIncrement >= 0 : lastPositionsIncrement + " < " + 0;   for ( j = Fast.mostSignificantBit( k ^ ( cache >> quantumDivisionShift ) ); j >= s + 1; j-- ) {    bitSkip[ j ] -= deltaBits;    positionsBitSkip[ j ] -= lastPositionsIncrement;    pointerSkip[ j ] -= deltaPointers;   }   readBitsAtLastSkipTower = ibs.readBits();   pointerAtLastSkipTower = currentDocument;   lowest = i < 0 ? 0 : i;   if ( DEBUG ) {    System.err.println( "{" + this + "} " + "Computed skip tower (lowest: " + lowest + ") for document record number " + numberOfDocumentRecord + " (pointer " + currentDocument + ") from " + Math.max( i, 0 ) + ": " );    System.err.print( "% " );    for ( j = 0; j <= s; j++ )     System.err.print( pointerSkip[ j ] + ":" + bitSkip[ j ] + ":" + positionsBitSkip[ j ] + " " );    System.err.print( " [" );    for ( ; j <= height; j++ )     System.err.print( pointerSkip[ j ] + ":" + bitSkip[ j ] + ":" + positionsBitSkip[ j ] + " " );    System.err.print( "]" );    System.err.println();   }   if ( ASSERTS ) {    for ( j = Fast.mostSignificantBit( k ^ ( cache >> quantumDivisionShift ) ); j >= s + 1; j-- ) {     assert positionsBitSkip[ j ] >= 0 : "positionsBitSkip[" + j + "] = " + positionsBitSkip[ j ] + " < " + 0;     assert positionsBitSkip[ j ] >= positionsBitSkip[ j - 1 ] :      "positionsBitSkip[" + j + "] = " + positionsBitSkip[ j ] + " < " + positionsBitSkip[ j - 1 ] + " = positionsBitSkip[" + ( j - 1 ) + "]";    }   }  }  public int skipTo( final int p ) throws IOException {   if ( DEBUG ) System.err.println( this + ".skipTo(" + p + ") [currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", positionsBitsOffset=" + positionsBitsOffset + "]" );   // If we are just at the start of a list, let us read the first pointer.   if ( numberOfDocumentRecord == -1 ) nextDocument(); // TODO: shouldn't we just read the                // tower?   if ( currentDocument >= p ) {    if ( DEBUG ) System.err.println( this + ": No skip necessary, returning " + currentDocument );    return currentDocument;   }   if ( state == BEFORE_TOWER ) {    lastPositionsIncrement = maxh >= 0 ? positionsBitSkip[ 0 ] : 0;    readTower( p );    if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + lastPositionsIncrement + " in skipTo() initial phase" );    positionsBitsOffset += lastPositionsIncrement;   }   final int[] pointerSkip = this.pointerSkip;   for ( ;; ) {    if ( ASSERTS ) assert maxh < 0 || lowest > 0 || pointerSkip[ 0 ] != Integer.MAX_VALUE;    // If on a defective quantum (no tower) or p is inside the current quantum (no    // need to scan the tower) we bail out.    if ( maxh < 0 || lowest == 0 && pointerAtLastSkipTower + pointerSkip[ 0 ] > p ) break;    if ( DEBUG ) System.err.println( this + ": In for loop, currentDocument=" + currentDocument + ", maxh=" + maxh + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p );    final int cacheOffset = ( numberOfDocumentRecord & wModuloMask );    final int k = cacheOffset >> quantumDivisionShift;    final int top = Fast.mostSignificantBit( k ^ ( Math.min( w, frequency - numberOfDocumentRecord + cacheOffset ) >> quantumDivisionShift ) );    int i;    for ( i = lowest; i <= top; i++ ) {     if ( ASSERTS && ( k & 1 << i ) != 0 ) assert pointerSkip[ i ] == pointerSkip[ i + 1 ];     if ( ASSERTS ) assert pointerSkip[ i ] != Integer.MAX_VALUE : "Invalid pointer skip " + i + " (lowest=" + lowest + ", top=" + top + ")";     if ( pointerAtLastSkipTower + pointerSkip[ i ] > p ) break;    }    if ( --i < 0 ) break;    if ( ASSERTS ) assert i >= lowest : i + " < " + lowest;    if ( DEBUG ) System.err.println( this + ": Safely after for with i=" + i + ", P[i]=" + pointerSkip[ i ] + ", A[i]=" + bitSkip[ i ] + ", B[i]=" + positionsBitSkip[ i ] );    if ( DEBUG ) System.err.println( this + ": [" + ibs.readBits() + "] Skipping " + ( bitSkip[ i ] - ( ibs.readBits() - readBitsAtLastSkipTower ) ) + " bits ("      + ( ( ( k & -( 1 << i ) ) + ( 1 << i ) ) * quantum - cacheOffset ) + " records) to get to document pointer " + ( currentDocument + pointerSkip[ i ] ) );    ibs.skip( bitSkip[ i ] - ( ibs.readBits() - readBitsAtLastSkipTower ) );    /* We accumulate the number of bits that must be skipped in the positions stream to reach the position				 * corresponding to the current tower. */    if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + positionsBitSkip[ i ] + " (height " + i + ")" );    lastPositionsIncrement = positionsBitSkip[ i ];    positionsBitsOffset += lastPositionsIncrement;    if ( ASSERTS && positionsLength != -1 ) assert positionsBitsOffset <= positionsLength : positionsBitsOffset + " > " + positionsLength;    state = BEFORE_TOWER;    currentDocument = pointerSkip[ i ] + pointerAtLastSkipTower;    numberOfDocumentRecord += ( ( k & -( 1 << i ) ) + ( 1 << i ) ) * quantum - cacheOffset;    // If we skipped beyond the end of the list, we invalidate the current document.    if ( numberOfDocumentRecord == frequency ) {     currentDocument = Integer.MAX_VALUE;     positionsUnread = false;     state = BEFORE_POINTER; // We are actually before a frequency, but we must     // avoid that calls to nextDocument() read anything    }    else {     positionsUnread = true;     readTower( p ); // Note that if we are exactly on the destination pointer, we will read the entire tower.    }    count = -1; // We must invalidate count as readDocumentPointer() would do.    positionsToReadToReachCurrentPosition = 0;    if ( endOfList() ) {     if ( DEBUG ) System.err.println( this + ".toSkip(): end-of-list, returning " + currentDocument + ", state=" + state );     // Note that if p == Integer.MAX_VALUE, we are certainly beyond end-of-list     return p == Integer.MAX_VALUE ? Integer.MAX_VALUE : currentDocument;    }   }   if ( DEBUG ) System.err.println( this + ": Completing sequentially, currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p );   while ( currentDocument < p ) {    if ( DEBUG ) System.err.println( this + ": Skipping sequentially (second), currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p );    if ( nextDocument() == -1 ) {     if ( DEBUG ) System.err.println( this + ": end-of-list, returning MAX_VALUE" );     return Integer.MAX_VALUE;    }   }   if ( DEBUG ) System.err.println( this + ".toSkip(): Returning " + currentDocument );   return currentDocument;  }  public void dispose() throws IOException {   parent.close();  }  public boolean hasNext() {   return !endOfList();  }  public int nextInt() {   if ( !hasNext() ) throw new NoSuchElementException();   try {    return nextDocument();   }   catch ( IOException e ) {    throw new RuntimeException( e );   }  }  public String toString() {   return index + " [" + term + "]";  }  /**		 * An interval iterator returning the positions of the current document as singleton		 * intervals.		 */  private final class IndexIntervalIterator extends AbstractObjectIterator<Interval> implements IntervalIterator {   int pos = -1;   public void reset() throws IOException {    pos = -1;    if ( positionsUnread ) updatePositionCache(); // This guarantees the position cache is ok   }   public void intervalTerms( final IntSet terms ) {    terms.add( BitStreamHPIndexReaderIndexIterator.this.term );   }   public boolean hasNext() {    return pos < count - 1;   }   public Interval next() {    if ( !hasNext() ) throw new NoSuchElementException();    return Interval.valueOf( positionCache[ ++pos ] );   }   public Interval nextInterval() {    return pos < count - 1 ? Interval.valueOf( positionCache[ ++pos ] ) : null;   }   public int extent() {    return 1;   }   public String toString() {    return index + ": " + term + "[doc=" + currentDocument + ", count=" + count + ", pos=" + pos + "]";   }  };  public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators() throws IOException {   intervalIterator();   return singletonIntervalIterator;  }  public IntervalIterator intervalIterator() throws IOException {   return intervalIterator( keyIndex );  }  public IntervalIterator intervalIterator( final Index index ) throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   // TODO: this was if ( index != keyIndex || hasPayloads )   if ( index != keyIndex ) return IntervalIterators.TRUE;   if ( ASSERTS ) assert intervalIterator != null;   intervalIterator.reset();   return intervalIterator;  }  public ReferenceSet<Index> indices() {   return index.singletonSet;  } } private IndexIterator documents( final CharSequence term, final int termNumber ) throws IOException {  indexIterator.term( term );  indexIterator.position( termNumber );  return indexIterator; } public IndexIterator documents( final int term ) throws IOException {  return documents( null, term ); } public IndexIterator documents( final CharSequence term ) throws IOException {  if ( closed ) throw new IllegalStateException( "This " + getClass().getSimpleName() + " has been closed" );  if ( index.termMap != null ) {   final int termIndex = (int)index.termMap.getLong( term );   if ( termIndex == -1 ) return index.emptyIndexIterator;   return documents( term, termIndex );  }  throw new UnsupportedOperationException( "Index " + index + " has no term map" ); } @Override public IndexIterator nextIterator() throws IOException {  return indexIterator.advance(); } public String toString() {  return getClass().getSimpleName() + "[" + index + "]"; } public void close() throws IOException {  super.close();  indexIterator.ibs.close();  indexIterator.positions.close(); }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -