⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bitstreamhpindexreader.c

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 C
📖 第 1 页 / 共 3 页
字号:
				positionsToReadToReachCurrentPosition += count;				if ( COOKIES ) positionsToReadToReachCurrentPosition++;			}			count = -1;			positionsUnread = true;			return currentDocument;		}		/**		 * Reads the entire skip tower for the current position. This method assumes that the tower		 * has been passed over sequentially, and correpondingly sets {@link #lastPositionsIncrement}		 * to the number of positions bits of the last quantum.		 */		private void readTower() throws IOException {			lastPositionsIncrement = maxh >= 0 ? positionsBitSkip[ 0 ] : 0;			if ( DEBUG ) System.err.println( this + ": Setting lastPositionsIncrement to " + lastPositionsIncrement + " in readTower()" );			readTower( -1 );			if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + lastPositionsIncrement + " in readTower()" );			// TODO: this should be moved into readTower(int)			positionsBitsOffset += lastPositionsIncrement;		}		/**		 * Reads the skip tower for the current position, possibly skipping part of the tower.		 * 		 * <P>Note that this method will update {@link #state} only if it reads the entire tower,		 * otherwise the state remains {@link #BEFORE_TOWER}.		 * 		 * @param pointer the tower will be read up to the first entry smaller than or equal to this		 * pointer; use -1 to guarantee that the entire tower will be read.		 */		private void readTower( final int pointer ) throws IOException {			int i, j, k, cacheOffset, cache, towerLength = 0;			long bitsAtTowerStart = 0;			boolean truncated = false;			if ( ASSERTS ) assert numberOfDocumentRecord % quantum == 0;			if ( ASSERTS && state != BEFORE_TOWER ) throw new IllegalStateException( "readTower() called in state " + state );			cacheOffset = ( numberOfDocumentRecord & wModuloMask );			k = cacheOffset >> quantumDivisionShift;			if ( ASSERTS && k == 0 ) { // Invalidate current tower data				it.unimi.dsi.fastutil.ints.IntArrays.fill( pointerSkip, Integer.MAX_VALUE );				it.unimi.dsi.fastutil.longs.LongArrays.fill( bitSkip, Integer.MAX_VALUE );				it.unimi.dsi.fastutil.longs.LongArrays.fill( positionsBitSkip, Integer.MAX_VALUE );			}			// Compute the height of the current skip tower.			s = ( k == 0 ) ? height : Fast.leastSignificantBit( k );			cache = frequency - w * ( numberOfDocumentRecord >> wDivisionShift );			if ( cache < w ) {				maxh = Fast.mostSignificantBit( ( cache >> quantumDivisionShift ) - k );				if ( maxh < s ) {					s = maxh;					truncated = true;				}				else truncated = false;			}			else {				cache = w;				maxh = height;				truncated = k == 0;			}			// assert w == cache || k == 0 || lastMaxh == Fast.mostSignificantBit( k ^ (			// cache/quantum ) ) : lastMaxh +","+ (Fast.mostSignificantBit( k ^ ( cache/quantum )			// ));			i = s;			if ( s >= 0 ) {				if ( k == 0 ) {					if ( quantumBitLength < 0 ) {						quantumBitLength = ibs.readDelta();						positionsQuantumBitLength = ibs.readDelta();						entryBitLength = ibs.readDelta();					}					else {						quantumBitLength += Fast.nat2int( ibs.readDelta() );						positionsQuantumBitLength += Fast.nat2int( ibs.readDelta() );						entryBitLength += Fast.nat2int( ibs.readDelta() );					}					if ( DEBUG ) System.err.println( "{" + this + "} quantum bit length=" + quantumBitLength + " positions quantum bit length=" + positionsQuantumBitLength + " entry bit length=" + entryBitLength );				}				if ( DEBUG ) System.err.println( "{" + this + "} Reading tower; pointer=" + pointer + " maxh=" + maxh + " s=" + s );				if ( s > 0 ) {					towerLength = entryBitLength * ( s + 1 ) + Fast.nat2int( ibs.readDelta() );					if ( DEBUG ) System.err.println( "{" + this + "} Tower length=" + towerLength );				}				// We store the number of bits read at the start of the tower (just after the				// length).				bitsAtTowerStart = ibs.readBits();				if ( truncated ) {					if ( DEBUG ) System.err.println( "{" + this + "} Truncated--reading tops" );					// We read the tower top.					pointerSkip[ s ] = Fast.nat2int( ibs.readGolomb( towerTopB[ s ], towerTopLog2B[ s ] ) ) + pointerPrediction[ s ];					bitSkip[ s ] = quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) + Fast.nat2int( ibs.readLongDelta() );					positionsBitSkip[ s ] = positionsQuantumBitLength * ( 1 << s ) + Fast.nat2int( ibs.readLongDelta() );				}				else {					// We copy the tower top from the lowest inherited entry suitably updated.					pointerSkip[ s ] = pointerSkip[ s + 1 ] - ( currentDocument - pointerAtLastSkipTower );					bitSkip[ s ] = bitSkip[ s + 1 ] - ( bitsAtTowerStart - readBitsAtLastSkipTower ) - towerLength;					// TODO: note that we could use the same method for pointers and bit skips!					positionsBitSkip[ s ] = positionsBitSkip[ s + 1 ] - positionsBitSkip[ s ];					if ( ASSERTS ) assert positionsBitSkip[ s ] > 0 : positionsBitSkip[ s ] + " <= " + 0;				}				// We read the remaining part of the tower, at least until we point after pointer.				if ( currentDocument + pointerSkip[ i ] > pointer ) {					for ( i = s - 1; i >= 0; i-- ) {						pointerSkip[ i ] = Fast.nat2int( ibs.readGolomb( towerLowerB[ i ], towerLowerLog2B[ i ] ) ) + pointerSkip[ i + 1 ] / 2;						bitSkip[ i ] = ( bitSkip[ i + 1 ] - entryBitLength * ( i + 1 ) ) / 2 - Fast.nat2int( ibs.readLongDelta() );						positionsBitSkip[ i ] = positionsBitSkip[ i + 1 ] / 2 - Fast.nat2int( ibs.readLongDelta() );						if ( DEBUG && currentDocument + pointerSkip[ i ] <= pointer ) System.err.println( "{" + this + "} stopping reading at i=" + i + " as currentDocument (" + currentDocument								+ ") plus pointer skip (" + pointerSkip[ i ] + ") is smaller than or equal target (" + pointer + ")" );						if ( currentDocument + pointerSkip[ i ] <= pointer ) break;					}				}			}			/*			 * If we did not read the entire tower, we need to fix the skips we read (as they are			 * offsets from the *end* of the tower) and moreover we must make unusable the rest of			 * the tower (for asserts).			 */			if ( i > 0 ) {				final long fix = ibs.readBits() - bitsAtTowerStart;				for ( j = s; j >= i; j-- )					bitSkip[ j ] += towerLength - fix;				if ( ASSERTS ) for ( ; j >= 0; j-- )					pointerSkip[ j ] = Integer.MAX_VALUE;			}			else state = BEFORE_COUNT;			// We update the inherited tower.			final long deltaBits = ibs.readBits() - readBitsAtLastSkipTower;			final int deltaPointers = currentDocument - pointerAtLastSkipTower;			if ( ASSERTS ) assert lastPositionsIncrement >= 0 : lastPositionsIncrement + " < " + 0;			for ( j = Fast.mostSignificantBit( k ^ ( cache >> quantumDivisionShift ) ); j >= s + 1; j-- ) {				bitSkip[ j ] -= deltaBits;				positionsBitSkip[ j ] -= lastPositionsIncrement;				pointerSkip[ j ] -= deltaPointers;			}			readBitsAtLastSkipTower = ibs.readBits();			pointerAtLastSkipTower = currentDocument;			lowest = i < 0 ? 0 : i;			if ( DEBUG ) {				System.err.println( "{" + this + "} " + "Computed skip tower (lowest: " + lowest + ") for document record number " + numberOfDocumentRecord + " (pointer " + currentDocument + ") from " + Math.max( i, 0 ) + ": " );				System.err.print( "% " );				for ( j = 0; j <= s; j++ )					System.err.print( pointerSkip[ j ] + ":" + bitSkip[ j ] + ":" + positionsBitSkip[ j ] + " " );				System.err.print( " [" );				for ( ; j <= height; j++ )					System.err.print( pointerSkip[ j ] + ":" + bitSkip[ j ] + ":" + positionsBitSkip[ j ] + " " );				System.err.print( "]" );				System.err.println();			}			if ( ASSERTS ) {				for ( j = Fast.mostSignificantBit( k ^ ( cache >> quantumDivisionShift ) ); j >= s + 1; j-- ) {					assert positionsBitSkip[ j ] >= 0 : "positionsBitSkip[" + j + "] = " + positionsBitSkip[ j ]  + " < " + 0;					assert positionsBitSkip[ j ] >= positionsBitSkip[ j - 1 ] : 						"positionsBitSkip[" + j + "] = " + positionsBitSkip[ j ]  + " < " + positionsBitSkip[ j - 1 ] + " = positionsBitSkip[" + ( j - 1 ) + "]";				}			}		}		public int skipTo( final int p ) throws IOException {			if ( DEBUG ) System.err.println( this + ".skipTo(" + p + ") [currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", positionsBitsOffset=" + positionsBitsOffset + "]" );			// If we are just at the start of a list, let us read the first pointer.			if ( numberOfDocumentRecord == -1 ) nextDocument(); // TODO: shouldn't we just read the																// tower?			if ( currentDocument >= p ) {				if ( DEBUG ) System.err.println( this + ": No skip necessary, returning " + currentDocument );				return currentDocument;			}			if ( state == BEFORE_TOWER ) {				lastPositionsIncrement = maxh >= 0 ? positionsBitSkip[ 0 ] : 0;				readTower( p );				if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + lastPositionsIncrement + " in skipTo() initial phase" );				positionsBitsOffset += lastPositionsIncrement;			}			final int[] pointerSkip = this.pointerSkip;			for ( ;; ) {				if ( ASSERTS ) assert maxh < 0 || lowest > 0 || pointerSkip[ 0 ] != Integer.MAX_VALUE;				// If on a defective quantum (no tower) or p is inside the current quantum (no				// need to scan the tower) we bail out.				if ( maxh < 0 || lowest == 0 && pointerAtLastSkipTower + pointerSkip[ 0 ] > p ) break;				if ( DEBUG ) System.err.println( this + ": In for loop, currentDocument=" + currentDocument + ", maxh=" + maxh + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p );				final int cacheOffset = ( numberOfDocumentRecord & wModuloMask );				final int k = cacheOffset >> quantumDivisionShift;				final int top = Fast.mostSignificantBit( k ^ ( Math.min( w, frequency - numberOfDocumentRecord + cacheOffset ) >> quantumDivisionShift ) );				int i;				for ( i = lowest; i <= top; i++ ) {					if ( ASSERTS && ( k & 1 << i ) != 0 ) assert pointerSkip[ i ] == pointerSkip[ i + 1 ];					if ( ASSERTS ) assert pointerSkip[ i ] != Integer.MAX_VALUE : "Invalid pointer skip " + i + " (lowest=" + lowest + ", top=" + top + ")";					if ( pointerAtLastSkipTower + pointerSkip[ i ] > p ) break;				}				if ( --i < 0 ) break;				if ( ASSERTS ) assert i >= lowest : i + " < " + lowest;				if ( DEBUG ) System.err.println( this + ": Safely after for with i=" + i + ", P[i]=" + pointerSkip[ i ] + ", A[i]=" + bitSkip[ i ] + ", B[i]=" + positionsBitSkip[ i ] );				if ( DEBUG ) System.err.println( this + ": [" + ibs.readBits() + "] Skipping " + ( bitSkip[ i ] - ( ibs.readBits() - readBitsAtLastSkipTower ) ) + " bits ("						+ ( ( ( k & -( 1 << i ) ) + ( 1 << i ) ) * quantum - cacheOffset ) + " records) to get to document pointer " + ( currentDocument + pointerSkip[ i ] ) );				ibs.skip( bitSkip[ i ] - ( ibs.readBits() - readBitsAtLastSkipTower ) );				/* We accumulate the number of bits that must be skipped in the positions stream to reach the position				 * corresponding to the current tower. */				if ( DEBUG ) System.err.println( this + ": Incrementing positionsBitsOffset by " + positionsBitSkip[ i ] + " (height " + i + ")" );				lastPositionsIncrement = positionsBitSkip[ i ];				positionsBitsOffset += lastPositionsIncrement;				if ( ASSERTS && positionsLength != -1 ) assert positionsBitsOffset <= positionsLength : positionsBitsOffset + " > " + positionsLength;				state = BEFORE_TOWER;				currentDocument = pointerSkip[ i ] + pointerAtLastSkipTower;				numberOfDocumentRecord += ( ( k & -( 1 << i ) ) + ( 1 << i ) ) * quantum - cacheOffset;				// If we skipped beyond the end of the list, we invalidate the current document.				if ( numberOfDocumentRecord == frequency ) {					currentDocument = Integer.MAX_VALUE;					positionsUnread = false;					state = BEFORE_POINTER; // We are actually before a frequency, but we must					// avoid that calls to nextDocument() read anything				}				else {					positionsUnread = true;					readTower( p ); // Note that if we are exactly on the destination pointer, we will read the entire tower.				}				count = -1; // We must invalidate count as readDocumentPointer() would do.				positionsToReadToReachCurrentPosition = 0;				if ( endOfList() ) {					if ( DEBUG ) System.err.println( this + ".toSkip(): end-of-list, returning " + currentDocument + ", state=" + state );					// Note that if p == Integer.MAX_VALUE, we are certainly beyond end-of-list					return p == Integer.MAX_VALUE ? Integer.MAX_VALUE : currentDocument;				}			}			if ( DEBUG ) System.err.println( this + ": Completing sequentially, currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p );			while ( currentDocument < p ) {				if ( DEBUG ) System.err.println( this + ": Skipping sequentially (second), currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p );				if ( nextDocument() == -1 ) {					if ( DEBUG ) System.err.println( this + ": end-of-list, returning MAX_VALUE" );					return Integer.MAX_VALUE;				}			}			if ( DEBUG ) System.err.println( this + ".toSkip(): Returning " + currentDocument );			return currentDocument;		}		public void dispose() throws IOException {			parent.close();		}		public boolean hasNext() {			return !endOfList();		}		public int nextInt() {			if ( !hasNext() ) throw new NoSuchElementException();			try {				return nextDocument();			}			catch ( IOException e ) {				throw new RuntimeException( e );			}		}		public String toString() {			return index + " [" + term + "]";		}		/**		 * An interval iterator returning the positions of the current document as singleton		 * intervals.		 */		private final class IndexIntervalIterator extends AbstractObjectIterator<Interval> implements IntervalIterator {			int pos = -1;			public void reset() throws IOException {				pos = -1;				if ( positionsUnread ) updatePositionCache(); // This guarantees the position cache is ok			}			public void intervalTerms( final IntSet terms ) {				terms.add( BitStreamHPIndexReaderIndexIterator.this.term );			}			public boolean hasNext() {				return pos < count - 1;			}			public Interval next() {				if ( !hasNext() ) throw new NoSuchElementException();				return Interval.valueOf( positionCache[ ++pos ] );			}			public Interval nextInterval() {				return pos < count - 1 ? Interval.valueOf( positionCache[ ++pos ] ) : null;			}			public int extent() {				return 1;			}			public String toString() {				return index + ": " + term + "[doc=" + currentDocument + ", count=" + count + ", pos=" + pos + "]";			}		};		public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators() throws IOException {			intervalIterator();			return singletonIntervalIterator;		}		public IntervalIterator intervalIterator() throws IOException {			return intervalIterator( keyIndex );		}		public IntervalIterator intervalIterator( final Index index ) throws IOException {			if ( ASSERTS ) ensureCurrentDocument();			// TODO: this was if ( index != keyIndex || hasPayloads )			if ( index != keyIndex ) return IntervalIterators.TRUE;			if ( ASSERTS ) assert intervalIterator != null;			intervalIterator.reset();			return intervalIterator;		}		public ReferenceSet<Index> indices() {			return index.singletonSet;		}	}	private IndexIterator documents( final CharSequence term, final int termNumber ) throws IOException {		indexIterator.term( term );		indexIterator.position( termNumber );		return indexIterator;	}	public IndexIterator documents( final int term ) throws IOException {		return documents( null, term );	}	public IndexIterator documents( final CharSequence term ) throws IOException {		if ( closed ) throw new IllegalStateException( "This " + getClass().getSimpleName() + " has been closed" );		if ( index.termMap != null ) {			final int termIndex = (int)index.termMap.getLong( term );			if ( termIndex == -1 ) return index.emptyIndexIterator;			return documents( term, termIndex );		}		throw new UnsupportedOperationException( "Index " + index + " has no term map" );	}	@Override	public IndexIterator nextIterator() throws IOException {		return indexIterator.advance();	}	public String toString() {		return getClass().getSimpleName() + "[" + index + "]";	}		public void close() throws IOException {		super.close();		indexIterator.ibs.close();		indexIterator.positions.close();	}}#else"Mismatch in specified directives. First of all, you must define CLASSNAME.""Then, you must either just define GENERIC, or the full range of assertions""(#frequencies, #pointers, #counts, #positions)."#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -