📄 bitstreamhpindexreader.java
字号:
} else { state = BEFORE_COUNT; if ( ASSERTS ) assert count > 0 : count + " <= " + 0; positionsToReadToReachCurrentPosition += count; if ( COOKIES ) positionsToReadToReachCurrentPosition++; } count = -1; positionsUnread = true; return currentDocument; } /** * Reads the entire skip tower for the current position. This method assumes that the tower * has been passed over sequentially, and correpondingly sets {@link #lastPositionsIncrement} * to the number of positions bits of the last quantum. */ private void readTower() throws IOException { lastPositionsIncrement = maxh >= 0 ? positionsBitSkip[ 0 ] : 0; if ( DEBUG ) System.err.println( this + ": Setting lastPositionsIncrement to " + lastPositionsIncrement + " in readTower()" ); readTower( -1 ); if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + lastPositionsIncrement + " in readTower()" ); // TODO: this should be moved into readTower(int) positionsBitsOffset += lastPositionsIncrement; } /** * Reads the skip tower for the current position, possibly skipping part of the tower. * * <P>Note that this method will update {@link #state} only if it reads the entire tower, * otherwise the state remains {@link #BEFORE_TOWER}. * * @param pointer the tower will be read up to the first entry smaller than or equal to this * pointer; use -1 to guarantee that the entire tower will be read. */ private void readTower( final int pointer ) throws IOException { int i, j, k, cacheOffset, cache, towerLength = 0; long bitsAtTowerStart = 0; boolean truncated = false; if ( ASSERTS ) assert numberOfDocumentRecord % quantum == 0; if ( ASSERTS && state != BEFORE_TOWER ) throw new IllegalStateException( "readTower() called in state " + state ); cacheOffset = ( numberOfDocumentRecord & wModuloMask ); k = cacheOffset >> quantumDivisionShift; if ( ASSERTS && k == 0 ) { // Invalidate current tower data it.unimi.dsi.fastutil.ints.IntArrays.fill( pointerSkip, Integer.MAX_VALUE ); it.unimi.dsi.fastutil.longs.LongArrays.fill( bitSkip, Integer.MAX_VALUE ); it.unimi.dsi.fastutil.longs.LongArrays.fill( positionsBitSkip, Integer.MAX_VALUE ); } // Compute the height of the current skip tower. s = ( k == 0 ) ? height : Fast.leastSignificantBit( k ); cache = frequency - w * ( numberOfDocumentRecord >> wDivisionShift ); if ( cache < w ) { maxh = Fast.mostSignificantBit( ( cache >> quantumDivisionShift ) - k ); if ( maxh < s ) { s = maxh; truncated = true; } else truncated = false; } else { cache = w; maxh = height; truncated = k == 0; } // assert w == cache || k == 0 || lastMaxh == Fast.mostSignificantBit( k ^ ( // cache/quantum ) ) : lastMaxh +","+ (Fast.mostSignificantBit( k ^ ( cache/quantum ) // )); i = s; if ( s >= 0 ) { if ( k == 0 ) { if ( quantumBitLength < 0 ) { quantumBitLength = ibs.readDelta(); positionsQuantumBitLength = ibs.readDelta(); entryBitLength = ibs.readDelta(); } else { quantumBitLength += Fast.nat2int( ibs.readDelta() ); positionsQuantumBitLength += Fast.nat2int( ibs.readDelta() ); entryBitLength += Fast.nat2int( ibs.readDelta() ); } if ( DEBUG ) System.err.println( "{" + this + "} quantum bit length=" + quantumBitLength + " positions quantum bit length=" + positionsQuantumBitLength + " entry bit length=" + entryBitLength ); } if ( DEBUG ) System.err.println( "{" + this + "} Reading tower; pointer=" + pointer + " maxh=" + maxh + " s=" + s ); if ( s > 0 ) { towerLength = entryBitLength * ( s + 1 ) + Fast.nat2int( ibs.readDelta() ); if ( DEBUG ) System.err.println( "{" + this + "} Tower length=" + towerLength ); } // We store the number of bits read at the start of the tower (just after the // length). bitsAtTowerStart = ibs.readBits(); if ( truncated ) { if ( DEBUG ) System.err.println( "{" + this + "} Truncated--reading tops" ); // We read the tower top. pointerSkip[ s ] = Fast.nat2int( ibs.readGolomb( towerTopB[ s ], towerTopLog2B[ s ] ) ) + pointerPrediction[ s ]; bitSkip[ s ] = quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) + Fast.nat2int( ibs.readLongDelta() ); positionsBitSkip[ s ] = positionsQuantumBitLength * ( 1 << s ) + Fast.nat2int( ibs.readLongDelta() ); } else { // We copy the tower top from the lowest inherited entry suitably updated. pointerSkip[ s ] = pointerSkip[ s + 1 ] - ( currentDocument - pointerAtLastSkipTower ); bitSkip[ s ] = bitSkip[ s + 1 ] - ( bitsAtTowerStart - readBitsAtLastSkipTower ) - towerLength; // TODO: note that we could use the same method for pointers and bit skips! positionsBitSkip[ s ] = positionsBitSkip[ s + 1 ] - positionsBitSkip[ s ]; if ( ASSERTS ) assert positionsBitSkip[ s ] > 0 : positionsBitSkip[ s ] + " <= " + 0; } // We read the remaining part of the tower, at least until we point after pointer. if ( currentDocument + pointerSkip[ i ] > pointer ) { for ( i = s - 1; i >= 0; i-- ) { pointerSkip[ i ] = Fast.nat2int( ibs.readGolomb( towerLowerB[ i ], towerLowerLog2B[ i ] ) ) + pointerSkip[ i + 1 ] / 2; bitSkip[ i ] = ( bitSkip[ i + 1 ] - entryBitLength * ( i + 1 ) ) / 2 - Fast.nat2int( ibs.readLongDelta() ); positionsBitSkip[ i ] = positionsBitSkip[ i + 1 ] / 2 - Fast.nat2int( ibs.readLongDelta() ); if ( DEBUG && currentDocument + pointerSkip[ i ] <= pointer ) System.err.println( "{" + this + "} stopping reading at i=" + i + " as currentDocument (" + currentDocument + ") plus pointer skip (" + pointerSkip[ i ] + ") is smaller than or equal target (" + pointer + ")" ); if ( currentDocument + pointerSkip[ i ] <= pointer ) break; } } } /* * If we did not read the entire tower, we need to fix the skips we read (as they are * offsets from the *end* of the tower) and moreover we must make unusable the rest of * the tower (for asserts). */ if ( i > 0 ) { final long fix = ibs.readBits() - bitsAtTowerStart; for ( j = s; j >= i; j-- ) bitSkip[ j ] += towerLength - fix; if ( ASSERTS ) for ( ; j >= 0; j-- ) pointerSkip[ j ] = Integer.MAX_VALUE; } else state = BEFORE_COUNT; // We update the inherited tower. final long deltaBits = ibs.readBits() - readBitsAtLastSkipTower; final int deltaPointers = currentDocument - pointerAtLastSkipTower; if ( ASSERTS ) assert lastPositionsIncrement >= 0 : lastPositionsIncrement + " < " + 0; for ( j = Fast.mostSignificantBit( k ^ ( cache >> quantumDivisionShift ) ); j >= s + 1; j-- ) { bitSkip[ j ] -= deltaBits; positionsBitSkip[ j ] -= lastPositionsIncrement; pointerSkip[ j ] -= deltaPointers; } readBitsAtLastSkipTower = ibs.readBits(); pointerAtLastSkipTower = currentDocument; lowest = i < 0 ? 0 : i; if ( DEBUG ) { System.err.println( "{" + this + "} " + "Computed skip tower (lowest: " + lowest + ") for document record number " + numberOfDocumentRecord + " (pointer " + currentDocument + ") from " + Math.max( i, 0 ) + ": " ); System.err.print( "% " ); for ( j = 0; j <= s; j++ ) System.err.print( pointerSkip[ j ] + ":" + bitSkip[ j ] + ":" + positionsBitSkip[ j ] + " " ); System.err.print( " [" ); for ( ; j <= height; j++ ) System.err.print( pointerSkip[ j ] + ":" + bitSkip[ j ] + ":" + positionsBitSkip[ j ] + " " ); System.err.print( "]" ); System.err.println(); } if ( ASSERTS ) { for ( j = Fast.mostSignificantBit( k ^ ( cache >> quantumDivisionShift ) ); j >= s + 1; j-- ) { assert positionsBitSkip[ j ] >= 0 : "positionsBitSkip[" + j + "] = " + positionsBitSkip[ j ] + " < " + 0; assert positionsBitSkip[ j ] >= positionsBitSkip[ j - 1 ] : "positionsBitSkip[" + j + "] = " + positionsBitSkip[ j ] + " < " + positionsBitSkip[ j - 1 ] + " = positionsBitSkip[" + ( j - 1 ) + "]"; } } } public int skipTo( final int p ) throws IOException { if ( DEBUG ) System.err.println( this + ".skipTo(" + p + ") [currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", positionsBitsOffset=" + positionsBitsOffset + "]" ); // If we are just at the start of a list, let us read the first pointer. if ( numberOfDocumentRecord == -1 ) nextDocument(); // TODO: shouldn't we just read the // tower? if ( currentDocument >= p ) { if ( DEBUG ) System.err.println( this + ": No skip necessary, returning " + currentDocument ); return currentDocument; } if ( state == BEFORE_TOWER ) { lastPositionsIncrement = maxh >= 0 ? positionsBitSkip[ 0 ] : 0; readTower( p ); if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + lastPositionsIncrement + " in skipTo() initial phase" ); positionsBitsOffset += lastPositionsIncrement; } final int[] pointerSkip = this.pointerSkip; for ( ;; ) { if ( ASSERTS ) assert maxh < 0 || lowest > 0 || pointerSkip[ 0 ] != Integer.MAX_VALUE; // If on a defective quantum (no tower) or p is inside the current quantum (no // need to scan the tower) we bail out. if ( maxh < 0 || lowest == 0 && pointerAtLastSkipTower + pointerSkip[ 0 ] > p ) break; if ( DEBUG ) System.err.println( this + ": In for loop, currentDocument=" + currentDocument + ", maxh=" + maxh + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p ); final int cacheOffset = ( numberOfDocumentRecord & wModuloMask ); final int k = cacheOffset >> quantumDivisionShift; final int top = Fast.mostSignificantBit( k ^ ( Math.min( w, frequency - numberOfDocumentRecord + cacheOffset ) >> quantumDivisionShift ) ); int i; for ( i = lowest; i <= top; i++ ) { if ( ASSERTS && ( k & 1 << i ) != 0 ) assert pointerSkip[ i ] == pointerSkip[ i + 1 ]; if ( ASSERTS ) assert pointerSkip[ i ] != Integer.MAX_VALUE : "Invalid pointer skip " + i + " (lowest=" + lowest + ", top=" + top + ")"; if ( pointerAtLastSkipTower + pointerSkip[ i ] > p ) break; } if ( --i < 0 ) break; if ( ASSERTS ) assert i >= lowest : i + " < " + lowest; if ( DEBUG ) System.err.println( this + ": Safely after for with i=" + i + ", P[i]=" + pointerSkip[ i ] + ", A[i]=" + bitSkip[ i ] + ", B[i]=" + positionsBitSkip[ i ] ); if ( DEBUG ) System.err.println( this + ": [" + ibs.readBits() + "] Skipping " + ( bitSkip[ i ] - ( ibs.readBits() - readBitsAtLastSkipTower ) ) + " bits (" + ( ( ( k & -( 1 << i ) ) + ( 1 << i ) ) * quantum - cacheOffset ) + " records) to get to document pointer " + ( currentDocument + pointerSkip[ i ] ) ); ibs.skip( bitSkip[ i ] - ( ibs.readBits() - readBitsAtLastSkipTower ) ); /* We accumulate the number of bits that must be skipped in the positions stream to reach the position * corresponding to the current tower. */ if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + positionsBitSkip[ i ] + " (height " + i + ")" ); lastPositionsIncrement = positionsBitSkip[ i ]; positionsBitsOffset += lastPositionsIncrement; if ( ASSERTS && positionsLength != -1 ) assert positionsBitsOffset <= positionsLength : positionsBitsOffset + " > " + positionsLength; state = BEFORE_TOWER; currentDocument = pointerSkip[ i ] + pointerAtLastSkipTower; numberOfDocumentRecord += ( ( k & -( 1 << i ) ) + ( 1 << i ) ) * quantum - cacheOffset; // If we skipped beyond the end of the list, we invalidate the current document. if ( numberOfDocumentRecord == frequency ) { currentDocument = Integer.MAX_VALUE; positionsUnread = false; state = BEFORE_POINTER; // We are actually before a frequency, but we must // avoid that calls to nextDocument() read anything } else { positionsUnread = true; readTower( p ); // Note that if we are exactly on the destination pointer, we will read the entire tower. } count = -1; // We must invalidate count as readDocumentPointer() would do. positionsToReadToReachCurrentPosition = 0; if ( endOfList() ) { if ( DEBUG ) System.err.println( this + ".toSkip(): end-of-list, returning " + currentDocument + ", state=" + state ); // Note that if p == Integer.MAX_VALUE, we are certainly beyond end-of-list return p == Integer.MAX_VALUE ? Integer.MAX_VALUE : currentDocument; } } if ( DEBUG ) System.err.println( this + ": Completing sequentially, currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p ); while ( currentDocument < p ) { if ( DEBUG ) System.err.println( this + ": Skipping sequentially (second), currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p ); if ( nextDocument() == -1 ) { if ( DEBUG ) System.err.println( this + ": end-of-list, returning MAX_VALUE" ); return Integer.MAX_VALUE; } } if ( DEBUG ) System.err.println( this + ".toSkip(): Returning " + currentDocument ); return currentDocument; } public void dispose() throws IOException { parent.close(); } public boolean hasNext() { return !endOfList(); } public int nextInt() { if ( !hasNext() ) throw new NoSuchElementException(); try { return nextDocument(); } catch ( IOException e ) { throw new RuntimeException( e ); } } public String toString() { return index + " [" + term + "]"; } /** * An interval iterator returning the positions of the current document as singleton * intervals. */ private final class IndexIntervalIterator extends AbstractObjectIterator<Interval> implements IntervalIterator { int pos = -1; public void reset() throws IOException { pos = -1; if ( positionsUnread ) updatePositionCache(); // This guarantees the position cache is ok } public void intervalTerms( final IntSet terms ) { terms.add( BitStreamHPIndexReaderIndexIterator.this.term ); } public boolean hasNext() { return pos < count - 1; } public Interval next() { if ( !hasNext() ) throw new NoSuchElementException(); return Interval.valueOf( positionCache[ ++pos ] ); } public Interval nextInterval() { return pos < count - 1 ? Interval.valueOf( positionCache[ ++pos ] ) : null; } public int extent() { return 1; } public String toString() { return index + ": " + term + "[doc=" + currentDocument + ", count=" + count + ", pos=" + pos + "]"; } }; public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators() throws IOException { intervalIterator(); return singletonIntervalIterator; } public IntervalIterator intervalIterator() throws IOException { return intervalIterator( keyIndex ); } public IntervalIterator intervalIterator( final Index index ) throws IOException { if ( ASSERTS ) ensureCurrentDocument(); // TODO: this was if ( index != keyIndex || hasPayloads ) if ( index != keyIndex ) return IntervalIterators.TRUE; if ( ASSERTS ) assert intervalIterator != null; intervalIterator.reset(); return intervalIterator; } public ReferenceSet<Index> indices() { return index.singletonSet; } } private IndexIterator documents( final CharSequence term, final int termNumber ) throws IOException { indexIterator.term( term ); indexIterator.position( termNumber ); return indexIterator; } public IndexIterator documents( final int term ) throws IOException { return documents( null, term ); } public IndexIterator documents( final CharSequence term ) throws IOException { if ( closed ) throw new IllegalStateException( "This " + getClass().getSimpleName() + " has been closed" ); if ( index.termMap != null ) { final int termIndex = (int)index.termMap.getLong( term ); if ( termIndex == -1 ) return index.emptyIndexIterator; return documents( term, termIndex ); } throw new UnsupportedOperationException( "Index " + index + " has no term map" ); } @Override public IndexIterator nextIterator() throws IOException { return indexIterator.advance(); } public String toString() { return getClass().getSimpleName() + "[" + index + "]"; } public void close() throws IOException { super.close(); indexIterator.ibs.close(); indexIterator.positions.close(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -