📄 skipgammadeltagammadeltabitstreamindexreader.java
字号:
// Read the frequency frequency = ibs.readGamma() + 1; hasPointers = frequency < index.numberOfDocuments; quantumBitLength = entryBitLength = -1; lowest = Integer.MAX_VALUE; if ( ASSERTS ) for( int i = height; i > Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i-- ) towerTopB[ i ] = towerLowerB[ i ] = pointerPrediction[ i ] = -1; final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, index.numberOfDocuments, quantum ); for( int i = Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i >= 0; i-- ) { towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 ); towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] ); towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i ); towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] ); pointerPrediction[ i ] = (int)( ( quantum * ( 1L << i ) * index.numberOfDocuments + frequency / 2 ) / frequency ); } count = -1; currentDocument = -1; numberOfDocumentRecord = -1; state = BEFORE_POINTER; } public Index index() { return keyIndex; } public int frequency() { return frequency; } private void ensureCurrentDocument() { if ( currentDocument < 0 ) throw new IllegalStateException( "nextDocument() has never been called for (term=" + currentTerm + ")" ); if ( currentDocument == Integer.MAX_VALUE ) throw new IllegalStateException( "This reader is positioned beyond the end of list of (term=" + currentTerm + ")" ); } /** Returns whether there are no more document records in the current inverted list. * * <p>This method returns true if the last document pointer of the current inverted * list has been read. It makes no distinction as to where (inside the last document * record) this reader is currently positioned. In particular, this method will * return true independently of whether count and positions have been read or not (we * note by passing that this is the only sensible behaviour, as you can build indices * with or without counts/positions). * * <p>This method will return true also when this reader is positioned <em>beyond</em> * the last document pointer. In this case, {@link #currentDocumentPointer()} will * return {@link Integer#MAX_VALUE}. * * @return true whether there are no more document records in the current inverted list. */ private boolean endOfList() { if ( ASSERTS ) assert numberOfDocumentRecord <= frequency; return numberOfDocumentRecord >= frequency - 1; } public int document() { if ( ASSERTS ) ensureCurrentDocument(); return currentDocument; } public Payload payload() throws IOException { if ( DEBUG ) System.err.println( this + ".payload()" ); if ( ASSERTS ) ensureCurrentDocument(); throw new UnsupportedOperationException( "This index ("+ index + ") does not contain payloads" ); } public int count() throws IOException { if ( DEBUG ) System.err.println( this + ".count()" ); if ( count != -1 ) return count; if ( ASSERTS ) ensureCurrentDocument(); if ( state == BEFORE_TOWER ) readTower(); { state = BEFORE_POSITIONS; count = ibs.readGamma() + 1; } return count; } /** We read positions, assuming state <= BEFORE_POSITIONS */ @SuppressWarnings("unused") protected void updatePositionCache() throws IOException { if ( ASSERTS ) assert state <= BEFORE_POSITIONS; if ( state < BEFORE_POSITIONS ) { if ( state == BEFORE_TOWER ) readTower(); if ( state == BEFORE_COUNT ) { state = BEFORE_POSITIONS; count = ibs.readGamma() + 1; } } if ( count > positionCache.length ) positionCache = new int[ Math.max( positionCache.length * 2, count ) ]; final int[] occ = positionCache; state = BEFORE_POINTER; ibs.readDeltas( occ, count ); for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1; } public IntIterator positions() throws IOException { if ( ASSERTS ) ensureCurrentDocument(); if ( state <= BEFORE_POSITIONS ) updatePositionCache(); return IntIterators.wrap( positionCache, 0, count ); } public int[] positionArray() throws IOException { if ( ASSERTS ) ensureCurrentDocument(); if ( state <= BEFORE_POSITIONS ) updatePositionCache(); return positionCache; } // TODO: check who's using this (positionArray() is actually faster now) public int positions( final int[] position ) throws IOException { if ( ASSERTS ) ensureCurrentDocument(); if ( state <= BEFORE_POSITIONS ) updatePositionCache(); // And also that positions have been read if ( position.length < count ) return -count; for( int i = count; i-- != 0; ) position[ i ] = this.positionCache[ i ]; return count; } public int nextDocument() throws IOException { if ( DEBUG ) System.err.println( "{" + this + "} nextDocument()" ); if ( state != BEFORE_POINTER ) { if ( state == BEFORE_TOWER ) readTower(); if ( state == BEFORE_COUNT ) { state = BEFORE_POSITIONS; count = ibs.readGamma() + 1; } if ( state == BEFORE_POSITIONS ) { // Here we just skip; note that the state change is necessary if endOfList() is true state = BEFORE_POINTER; ibs.skipDeltas( count ); } } if ( endOfList() ) return -1; if ( hasPointers ) {// We do not write pointers for everywhere occurring terms. currentDocument += ibs.readDelta() + 1; } else currentDocument++; numberOfDocumentRecord++; state = BEFORE_COUNT; count = -1; if ( ( numberOfDocumentRecord & quantumModuloMask ) == 0 ) state = BEFORE_TOWER; return currentDocument; } /** Reads the entire skip tower for the current position. */ private void readTower() throws IOException { readTower( -1 ); } /** Reads the skip tower for the current position, possibly skipping part of the tower. * * <P>Note that this method will update {@link #state} only if it reads the entire tower, * otherwise the state remains {@link #BEFORE_TOWER}. * * @param pointer the tower will be read up to the first entry smaller than or equal to this pointer; use * -1 to guarantee that the entire tower will be read. */ private void readTower( final int pointer ) throws IOException { int i, j, k, cacheOffset, cache, towerLength = 0; long bitsAtTowerStart = 0; boolean truncated = false; if ( ASSERTS ) assert numberOfDocumentRecord % quantum == 0; if ( ASSERTS && state != BEFORE_TOWER ) throw new IllegalStateException( "readTower() called in state " + state ); cacheOffset = ( numberOfDocumentRecord & wModuloMask ); k = cacheOffset >> quantumDivisionShift; if ( ASSERTS && k == 0 ) { // Invalidate current tower data it.unimi.dsi.fastutil.ints.IntArrays.fill( pointerSkip, Integer.MAX_VALUE ); it.unimi.dsi.fastutil.longs.LongArrays.fill( bitSkip, Integer.MAX_VALUE ); } // Compute the height of the current skip tower. s = ( k == 0 )? height : Fast.leastSignificantBit( k ); cache = frequency - w * ( numberOfDocumentRecord >> wDivisionShift ); if ( cache < w ) { maxh = Fast.mostSignificantBit( ( cache >> quantumDivisionShift ) - k ); if ( maxh < s ) { s = maxh; truncated = true; } else truncated = false; } else { cache = w; maxh = height; truncated = k == 0; } //assert w == cache || k == 0 || lastMaxh == Fast.mostSignificantBit( k ^ ( cache/quantum ) ) : lastMaxh +","+ (Fast.mostSignificantBit( k ^ ( cache/quantum ) )); i = s; if ( s >= 0 ) { if ( k == 0 ) { if ( quantumBitLength < 0 ) { quantumBitLength = ibs.readDelta(); entryBitLength = ibs.readDelta(); } else { quantumBitLength += Fast.nat2int( ibs.readDelta() ); entryBitLength += Fast.nat2int( ibs.readDelta() ); } if ( DEBUG ) System.err.println( "{" + this + "} quantum bit length=" + quantumBitLength + " entry bit length=" + entryBitLength ); } if ( DEBUG ) System.err.println( "{" + this + "} Reading tower; pointer=" + pointer + " maxh=" + maxh + " s=" + s ); if ( s > 0 ) { towerLength = entryBitLength * ( s + 1 ) + Fast.nat2int( ibs.readDelta() ); if ( DEBUG ) System.err.println( "{" + this + "} Tower length=" + towerLength ); } // We store the number of bits read at the start of the tower (just after the length). bitsAtTowerStart = ibs.readBits(); if ( truncated ) { if ( DEBUG ) System.err.println( "{" + this + "} Truncated--reading tops" ); // We read the tower top. pointerSkip[ s ] = Fast.nat2int( ibs.readGolomb( towerTopB[ s ], towerTopLog2B[ s ] ) ) + pointerPrediction[ s ]; bitSkip[ s ] = quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) + Fast.nat2int( ibs.readLongDelta() ); } else { // We copy the tower top from the lowest inherited entry suitably updated. pointerSkip[ s ] = pointerSkip[ s + 1 ] - ( currentDocument - pointerAtLastSkipTower ); bitSkip[ s ] = bitSkip[ s + 1 ] - ( bitsAtTowerStart - readBitsAtLastSkipTower ) - towerLength; } // We read the remaining part of the tower, at least until we point after pointer. if ( currentDocument + pointerSkip[ i ] > pointer ) { for( i = s - 1; i >= 0; i-- ) { pointerSkip[ i ] = Fast.nat2int( ibs.readGolomb( towerLowerB[ i ], towerLowerLog2B[ i ] ) ) + pointerSkip[ i + 1 ] / 2; bitSkip[ i ] = ( bitSkip[ i + 1 ] - entryBitLength * ( i + 1 ) ) / 2 - Fast.nat2int( ibs.readLongDelta() ); if ( DEBUG && currentDocument + pointerSkip[ i ] <= pointer ) System.err.println( "{" + this + "} stopping reading at i=" + i + " as currentDocument (" + currentDocument + ") plus pointer skip (" + pointerSkip[ i ] + ") is smaller than or equal target (" + pointer +")" ); if ( currentDocument + pointerSkip[ i ] <= pointer ) break; } } } /* If we did not read the entire tower, we need to fix the skips we read (as they * are offsets from the *end* of the tower) and moreover we must make unusable the * rest of the tower (for asserts). */ if ( i > 0 ) { final long fix = ibs.readBits() - bitsAtTowerStart; for( j = s; j >= i; j-- ) bitSkip[ j ] += towerLength - fix; if ( ASSERTS ) for( ; j >= 0; j-- ) pointerSkip[ j ] = Integer.MAX_VALUE; } else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -