⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bitstreamhpindexreader.c

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 C
📖 第 1 页 / 共 3 页
字号:
		private void readFrequency() throws IOException {			// Read the frequency#if GENERIC			switch ( index.frequencyCoding ) {			case GAMMA:#endif#if GENERIC || #frequencies(GAMMA)				frequency = ibs.readGamma() + 1;#endif#if GENERIC				break;			case SHIFTED_GAMMA:#endif#if GENERIC || #frequencies(SHIFTED_GAMMA)				frequency = ibs.readShiftedGamma() + 1;#endif#if GENERIC				break;			case DELTA:#endif#if GENERIC || #frequencies(DELTA)				frequency = ibs.readDelta() + 1;#endif#if GENERIC				break;			default:				throw new IllegalStateException( "The required frequency coding (" + index.frequencyCoding + ") is not supported." );			}#endif							if ( DEBUG ) System.err.println( this + ": Frequency for term " + term + " is " + frequency );						hasPointers = frequency < index.numberOfDocuments;#if GENERIC				// We compute the modulus used for pointer Golomb coding			if ( pointerCoding == Coding.GOLOMB ) {#endif#if GENERIC || #pointers(GOLOMB)				if ( hasPointers ) {					b = BitStreamIndex.golombModulus( frequency, index.numberOfDocuments );					log2b = Fast.mostSignificantBit( b );				}#endif#if GENERIC				}#endif			quantumBitLength = positionsQuantumBitLength = entryBitLength = -1;			lowest = Integer.MAX_VALUE;			if ( ASSERTS ) for ( int i = height; i > Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i-- )				towerTopB[ i ] = towerLowerB[ i ] = pointerPrediction[ i ] = -1;			final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, index.numberOfDocuments, quantum );			for ( int i = Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i >= 0; i-- ) {				towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 );				towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] );				towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i );				towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] );				pointerPrediction[ i ] = (int)( ( quantum * ( 1L << i ) * index.numberOfDocuments + frequency / 2 ) / frequency );			}			count = -1;			currentDocument = -1;			numberOfDocumentRecord = -1;			positionsBitsOffset = 0;			positionsBitSkip[ 0 ] = 0; // To avoid spurious tower updates on the first tower			positionsToReadToReachCurrentPosition = 0;			lastPositionsIncrement = 0;			state = BEFORE_POINTER;		}		public Index index() {			return keyIndex;		}		public int frequency() {			return frequency;		}		private void ensureCurrentDocument() {			if ( currentDocument < 0 ) throw new IllegalStateException( "nextDocument() has never been called for (term=" + term + ")" );			if ( currentDocument == Integer.MAX_VALUE ) throw new IllegalStateException( "This reader is positioned beyond the end of list of (term=" + term + ")" );		}		/**		 * Returns whether there are no more document records in the current inverted list.		 * 		 * <p>This method returns true if the last document pointer of the current inverted list		 * has been read. It makes no distinction as to where (inside the last document record) this		 * reader is currently positioned. In particular, this method will return true independently		 * of whether count and positions have been read or not (we note by passing that this is the		 * only sensible behaviour, as you can build indices with or without counts/positions).		 * 		 * <p>This method will return true also when this reader is positioned <em>beyond</em>		 * the last document pointer. In this case, {@link #currentDocumentPointer()} will return		 * {@link Integer#MAX_VALUE}.		 * 		 * @return true whether there are no more document records in the current inverted list.		 */		private boolean endOfList() {			if ( ASSERTS ) assert numberOfDocumentRecord <= frequency;			return numberOfDocumentRecord >= frequency - 1;		}		public int document() {			if ( ASSERTS ) ensureCurrentDocument();			return currentDocument;		}		public Payload payload() throws IOException {			throw new UnsupportedOperationException( "This index (" + index + ") does not contain payloads" );		}		public int count() throws IOException {			if ( DEBUG ) System.err.println( this + ".count()" );			if ( count != -1 ) return count;			if ( ASSERTS ) ensureCurrentDocument();			if ( state == BEFORE_TOWER ) readTower();			if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException();			state = BEFORE_POINTER;#if GENERIC				switch ( countCoding ) {			case UNARY:#endif#if GENERIC || #counts(UNARY)				count = ibs.readUnary() + 1;#endif#if GENERIC					break;			case SHIFTED_GAMMA:#endif#if GENERIC || #counts(SHIFTED_GAMMA)				count = ibs.readShiftedGamma() + 1;#endif#if GENERIC					break;			case GAMMA:#endif#if GENERIC || #counts(GAMMA)				count = ibs.readGamma() + 1;#endif#if GENERIC					break;			case DELTA:#endif#if GENERIC || #counts(DELTA)				count = ibs.readDelta() + 1;#endif#if GENERIC					break;			default:				throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." );			}#endif			return count;		}		protected void updatePositionCache() throws IOException {			if ( DEBUG ) System.err.println( this + ".updatePositionCache()" );			positionsUnread = false;			count(); // This will force reading the tower and updating positionsBitsOffset, if necessary			if ( positionsBitsOffset > positions.readBits() ) {				if ( DEBUG ) System.err.println( this + ": positionsBitsOffset=" + positionsBitsOffset +  ", positions.readBits()=" + positions.readBits() + ", skipping by " + ( positionsBitsOffset - positions.readBits() ) );				positions.skip( positionsBitsOffset - positions.readBits() );			}						if ( ASSERTS ) assert positionsToReadToReachCurrentPosition >= 0 : positionsToReadToReachCurrentPosition + " < 0";			if ( positionsToReadToReachCurrentPosition > 0 ) {				if ( DEBUG ) System.err.println( this + ":Skipping sequentially " + positionsToReadToReachCurrentPosition + " positions..." );				// We skip, inside the current quantum, the positions we haven't read#if GENERIC				switch ( positionCoding ) {				case SHIFTED_GAMMA:#endif#if GENERIC	|| #positions(SHIFTED_GAMMA)					if ( COOKIES ) {						positionsToReadToReachCurrentPosition--;						if ( positions.readShiftedGamma() != Integer.MAX_VALUE ) throw new AssertionError();					}					positions.skipShiftedGammas( positionsToReadToReachCurrentPosition );#endif#if GENERIC						break;				case GAMMA:#endif#if GENERIC	|| #positions(GAMMA)					if ( COOKIES ) {						positionsToReadToReachCurrentPosition--;						if ( positions.readGamma() != Integer.MAX_VALUE ) throw new AssertionError();					}					positions.skipGammas( positionsToReadToReachCurrentPosition );#endif#if GENERIC						break;				case DELTA:#endif#if GENERIC	|| #positions(DELTA)					if ( COOKIES ) {						positionsToReadToReachCurrentPosition--;						if ( positions.readDelta() != Integer.MAX_VALUE ) throw new AssertionError();					}					positions.skipDeltas( positionsToReadToReachCurrentPosition );#endif#if GENERIC						break;				default:					throw new IllegalStateException( "The required position coding (" + positionCoding + ") is not supported." );				}#endif			}									// We must fix it so that nextDocument() will restore it to 0			positionsToReadToReachCurrentPosition = -count;			if ( COOKIES ) positionsToReadToReachCurrentPosition--;			if ( count > positionCache.length ) positionCache = new int[ Math.max( positionCache.length * 2, count ) ];			final int[] occ = positionCache;						#if GENERIC			switch ( positionCoding ) {			case SHIFTED_GAMMA:#endif#if GENERIC	|| #positions(SHIFTED_GAMMA)				if ( COOKIES && positions.readShiftedGamma() != Integer.MAX_VALUE ) throw new AssertionError();				positions.readShiftedGammas( occ, count );				for ( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;#endif#if GENERIC					return;			case GAMMA:#endif#if GENERIC	|| #positions(GAMMA)				if ( COOKIES && positions.readGamma() != Integer.MAX_VALUE ) throw new AssertionError();				positions.readGammas( occ, count );				for ( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;#endif#if GENERIC					return;			case DELTA:#endif#if GENERIC	|| #positions(DELTA)				if ( COOKIES && positions.readDelta() != Integer.MAX_VALUE ) throw new AssertionError();				positions.readDeltas( occ, count );				for ( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;#endif#if GENERIC					return;			default:				throw new IllegalStateException( "The required position coding (" + index.positionCoding + ") is not supported." );			}#endif		}		public IntIterator positions() throws IOException {			if ( ASSERTS ) ensureCurrentDocument();			if ( positionsUnread ) updatePositionCache();			return IntIterators.wrap( positionCache, 0, count );		}		public int[] positionArray() throws IOException {			if ( ASSERTS ) ensureCurrentDocument();			if ( positionsUnread ) updatePositionCache();			return positionCache;		}		// TODO: check who's using this (positionArray() is actually faster now)		public int positions( final int[] position ) throws IOException {			if ( ASSERTS ) ensureCurrentDocument();			if ( positionsUnread ) updatePositionCache(); // And also that positions have																	// been read			if ( position.length < count ) return -count;			for ( int i = count; i-- != 0; )				position[ i ] = this.positionCache[ i ];			return count;		}		public int nextDocument() throws IOException {			if ( DEBUG ) System.err.println( "{" + this + "} nextDocument()" );			if ( state != BEFORE_POINTER ) {				if ( state == BEFORE_TOWER ) readTower();				if ( state == BEFORE_COUNT ) {#if GENERIC						switch ( countCoding ) {					case UNARY:#endif#if GENERIC || #counts(UNARY)						count = ibs.readUnary() + 1;#endif#if GENERIC							break;					case SHIFTED_GAMMA:#endif#if GENERIC || #counts(SHIFTED_GAMMA)						count = ibs.readShiftedGamma() + 1;#endif#if GENERIC							break;					case GAMMA:#endif#if GENERIC || #counts(GAMMA)						count = ibs.readGamma() + 1;#endif#if GENERIC							break;					case DELTA:#endif#if GENERIC || #counts(DELTA)						count = ibs.readDelta() + 1;#endif#if GENERIC							break;					default:						throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." );					}#endif										state = BEFORE_POINTER;				}			}			if ( endOfList() ) return -1;			if ( hasPointers ) {// We do not write pointers for everywhere occurring terms.#if GENERIC					switch ( pointerCoding ) {				case SHIFTED_GAMMA:#endif#if GENERIC || #pointers(SHIFTED_GAMMA)					currentDocument += ibs.readShiftedGamma() + 1;#endif#if GENERIC						break;				case GAMMA:#endif#if GENERIC || #pointers(GAMMA)					currentDocument += ibs.readGamma() + 1;#endif#if GENERIC						break;				case DELTA:#endif#if GENERIC || #pointers(DELTA)					currentDocument += ibs.readDelta() + 1;#endif#if GENERIC						break;				case GOLOMB:#endif#if GENERIC || #pointers(GOLOMB)					currentDocument += ibs.readGolomb( b, log2b ) + 1;#endif#if GENERIC						break;				default:					throw new IllegalStateException( "The required pointer coding (" + pointerCoding + ") is not supported." );				}#endif			}			else currentDocument++;			numberOfDocumentRecord++;			if ( ASSERTS && numberOfDocumentRecord > quantum ) assert positionsBitsOffset > 0;			if ( ( numberOfDocumentRecord & quantumModuloMask ) == 0 ) {				state = BEFORE_TOWER;				positionsToReadToReachCurrentPosition = 0;			}			else {				state = BEFORE_COUNT;				if ( ASSERTS ) assert count > 0 : count + " <= " + 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -