⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skipbitstreamindexwriter.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
				// Produce a (single) tower of height s				for ( i = s - 1; i >= 0; i-- ) {					towerData.bitsForLowerSkipPointers += skip[k].writeGolomb( 						Fast.int2nat( ( skipPointer[k + ( 1 << i )] - basePointer ) - ( ( skipPointer[k + ( 1 << i + 1 )] - basePointer ) / 2 ) ),						towerLowerB[ i ], towerLowerLog2B[ i ] 					);					if ( STATS && doinIt && writeStats ) pointerSkipLine = ( ( skipPointer[k + ( 1 << i )] - basePointer ) - ( ( skipPointer[k + ( 1 << i + 1 )] - basePointer ) / 2 ) ) + "\t" + pointerSkipLine;										towerData.bitsForLowerBitSkips += skip[k].writeLongDelta( 						Fast.int2nat( (int) ( ( ( toTheEnd - distance[k + ( 1 << ( i + 1 ) )] - entryBitLength * ( i + 1 ) ) / 2 ) -								( toTheEnd - distance[k + ( 1 << i )] ) ) ) 					);					if ( STATS && doinIt && writeStats ) bitSkipLine = (int) ( ( ( toTheEnd - distance[k + ( 1 << ( i + 1 ) )] - entryBitLength * ( i + 1 ) ) / 2 ) -							( toTheEnd - distance[k + ( 1 << i )] ) ) + "\t" + bitSkipLine;										if ( DEBUG && doinIt ) System.err.print( ( skipPointer[k + ( 1 << i )] - basePointer ) + ":" + ( toTheEnd - distance[k + ( 1 << i )] ) + " " );				}				if ( s > 0 ) { // No length for single-entry towers.					d = bitCount.writeDelta( Fast.int2nat( (int) skip[k].writtenBits() - ( s + 1 ) * entryBitLength ) );					towerData.bitsForTowerLengths += d;					toTheEnd += d;				}				if ( STATS && doinIt && writeStats ) {					if ( pointerSkipLine.length() > 0 ) pointerSkipStats.println( pointerSkipLine );					if ( bitSkipLine.length() > 0 ) bitSkipStats.println( bitSkipLine );				}				toTheEnd += skip[k].writtenBits();				if ( DEBUG && doinIt ) System.err.print( " (" + (int) skip[k].writtenBits() + " bits)" );				towerData.numberOfLowerEntries += s;				towerData.numberOfSkipTowers++;				if ( DEBUG && doinIt ) System.err.println();			}			distance[ k ] = toTheEnd;			// Where are we? Just before the beginning of the k-th skip tower			toTheEnd += cachePointer[ k ].writtenBits();			// Where are we? Just before the beginning of the k-th document record		}	}	/** Write out the cache content.	 * 	 * @param nextPointer the first pointer of the next block, or -1 if this is the last block.	 */	private void writeOutCache( final int nextPointer ) throws IOException {		if ( DEBUG ) System.err.println( "Entered writeOutCache() with cache=" + cache + " (H is " + ( 1 << h ) + ", B is " + w + ")" );		cacheDataLength[ ( cache + q - 1 ) / q - 1 ] = (int)cacheDataOut.writtenBits();		/* Number of bits to go after the first pointer of the first record of the next block (or, if there		   is no other block in the current list, to go to the end of the list). */		long toTheEnd;		// Record the new document pointer for the highest tower		int nextAfter = ( ( cache + q ) - 1 ) / q; // This is ceil( cache / q )		if ( nextPointer >= 0 ) {			skipPointer[nextAfter] = nextPointer;			toTheEnd = writeOutPointer( bitCount, nextPointer );		} else {			skipPointer[nextAfter] = currentDocument + 1; // Fake: just for the last block			toTheEnd = 0;		}		distance[nextAfter] = 0;		int k, s;		long d;		// Compute quantum length in bits (without towers)		int quantumBitLength = 0, entryBitLength = 0;		for ( d = k = 0; k <= ( ( cache - 1 ) / q ); k++ ) d += ( cachePointer[k].writtenBits() + cacheDataLength[ k ] );		quantumBitLength = (int)( ( ( d * q ) + ( cache - 1 ) ) / cache );		final TowerData td = new TowerData();		final Int2IntRBTreeMap candidates = new Int2IntRBTreeMap(); 		/* As a first try, we compute the tower costs using 0 as average entry bit length. */		tryTower( quantumBitLength, 0, toTheEnd, cacheSkipBitCount, td, false );				if ( td.numberOfSkipTowers > 0 ) { // There actually is at least a tower.			/* Now we repeat this operation, trying to obtain the best value for the			 * average entry bit length. 			 */			while( candidates.size() < MAX_TRY && ! candidates.containsValue( entryBitLength = (int)( td.bitsForTowers() / td.numberOfEntries() ) ) ) {				td.clear();				tryTower( quantumBitLength, entryBitLength, toTheEnd, cacheSkipBitCount, td, false );				candidates.put( (int)( td.bitsForTowers() / td.numberOfEntries() ), entryBitLength );			}			if ( ASSERTS ) assert candidates.size() < MAX_TRY;			entryBitLength = candidates.get( candidates.firstIntKey() );			if ( STATS && System.getProperty( "freq" ) != null ) {				final double freq = Double.parseDouble( System.getProperty( "freq" ) );				final double error = Integer.getInteger( "error" ).intValue() / 100.0;				final double relativeFrequency = (double)frequency / numberOfDocuments;				if ( ( writeStats = ( relativeFrequency >= freq * ( 1 - error ) && relativeFrequency <= freq * ( 1 + error ) ) ) ) {					pointerSkipStats.println( "# " + currentTerm );					pointerTopSkipStats.println( "# " + currentTerm );					bitSkipStats.println( "# " + currentTerm + " " + quantumBitLength + " " + entryBitLength );					bitTopSkipStats.println( "# " + currentTerm + " " + quantumBitLength + " " + entryBitLength );				}			}			if ( DEBUG ) System.err.println( "Going to write tower at position " + obs.writtenBits() );			tryTower( quantumBitLength, entryBitLength, toTheEnd, cacheSkip, towerData, true );		}		// Ready to write out cache		int maxCacheDataLength = 0;		for ( k = 0; k <= ( ( cache - 1 ) / q ); k++ ) if ( cacheDataLength[ k ] > maxCacheDataLength ) maxCacheDataLength = cacheDataLength[ k ];  				/* We have two ways of writing out cached data. If all the data is still in the output bit		 * stream buffer, we just read it directly. Otherwise, we have to pour it into a temporary buffer. */				final byte[] buffer;		final boolean direct;		int pos = 0;				cacheDataOut.align();		if ( cacheDataOut.buffer() != null ) {			buffer = cacheDataOut.buffer();			direct = true;		}		else {			cacheDataOut.flush();			buffer = new byte[ ( maxCacheDataLength + 7 ) / 8 ];			direct = false;			cacheDataIn.flush();			cacheDataIn.position( 0 );		}				for ( k = 0; k <= ( ( cache - 1 ) / q ); k++ ) {			/* See comments above. */			s = ( k == 0 ) ? h : Fast.leastSignificantBit( k );			if ( cache < w ) s = Math.min( s, Fast.mostSignificantBit( ( cache / q ) - k ) );			d = cachePointer[k].writtenBits();			cachePointer[k].flush();			obs.write( cachePointerByte[k].array, d );			d = cacheSkip[k].writtenBits();			cacheSkip[k].flush();			if ( s >= 0 ) {				if ( k == 0 ) {					if ( prevQuantumBitLength < 0 ) {						bitsForQuantumBitLengths += obs.writeLongDelta( quantumBitLength );						bitsForEntryBitLengths += obs.writeLongDelta( entryBitLength );					}					else {						bitsForQuantumBitLengths += obs.writeLongDelta( Fast.int2nat( quantumBitLength - prevQuantumBitLength ) );						bitsForEntryBitLengths += obs.writeLongDelta( Fast.int2nat( entryBitLength - prevEntryBitLength ) );					}					prevQuantumBitLength = quantumBitLength;					prevEntryBitLength = entryBitLength;					numberOfBlocks++;				}				if ( s > 0 ) obs.writeDelta( Fast.int2nat( (int)d - entryBitLength * ( s + 1 ) ) ); // No length for single-entry towers.			} else if ( ASSERTS ) assert d == 0;			obs.write( cacheSkipByte[k].array, d );						if ( direct ) {				obs.write( buffer, pos * 8, cacheDataLength[ k ] );				pos += ( cacheDataLength[ k ] + 7 ) / 8;			}			else {				cacheDataIn.read( buffer, 0, ( cacheDataLength[ k ] + 7 ) / 8 );				obs.write( buffer, cacheDataLength[ k ] );			}		}		// Clean used caches		for ( k = 0; k <= ( ( cache - 1 ) / q ); k++ ) {			cachePointerByte[k].reset();			cachePointer[k].writtenBits( 0 );			cacheSkipByte[k].reset();			cacheSkip[k].writtenBits( 0 );			cacheDataOut.position( 0 );			cacheDataOut.writtenBits( 0 );		}		cache = 0;		if ( ASSERTS ) assert obs.writtenBits() == writtenBits();	}	public long writtenBits() {		return super.writtenBits() + towerData.bitsForTopSkipPointers +		towerData.bitsForTopBitSkips + towerData.bitsForLowerSkipPointers +		towerData.bitsForLowerBitSkips + towerData.bitsForTowerLengths +		bitsForQuantumBitLengths + bitsForEntryBitLengths;	}		public Properties properties() {		Properties result = super.properties();		result.setProperty( Index.PropertyKeys.INDEXCLASS, FileIndex.class.getName() );		result.setProperty( BitStreamIndex.PropertyKeys.SKIPQUANTUM, q );		result.setProperty( BitStreamIndex.PropertyKeys.SKIPHEIGHT, h );		return result;		}	public void printStats( final PrintStream stats ) {		super.printStats( stats );		stats.println( "Skip towers: " + Util.format( towerData.numberOfSkipTowers ) + " (" + 				Util.format( towerData.bitsForTowers() ) + " bits [" + 				Util.format( towerData.bitsForTowers()*100.0/writtenBits() ) + "%], " + 				Util.format( towerData.bitsForTowers()/ (double)towerData.numberOfSkipTowers ) + " bits/tower)" );		stats.println( "Skip entries: " + Util.format( towerData.numberOfEntries() ) + " (" + 				Util.format( towerData.bitsForEntries() / (double)towerData.numberOfEntries()) + " bits/entry)" );		// Note that lengths are written approximately every other tower.		stats.println( "Skip tower lengths: " + Util.format( towerData.bitsForTowerLengths ) + " bits (" + Util.format( 2.0 * towerData.bitsForTowerLengths/ towerData.numberOfSkipTowers ) + " bits/tower)" );		stats.println( "Quantum bit lengths: " + Util.format( bitsForQuantumBitLengths ) + " bits (" + Util.format( bitsForQuantumBitLengths/ (double)numberOfBlocks ) + " bits/block)" );		stats.println( "Entry bit lengths: " + Util.format(bitsForEntryBitLengths ) + " bits (" + Util.format( bitsForEntryBitLengths/ (double)numberOfBlocks ) + " bits/block)" );				stats.println( "Top bit skips: " + Util.format(towerData.bitsForTopBitSkips ) + " bits (" + Util.format( towerData.bitsForTopBitSkips / (double)towerData.numberOfTopEntries ) + " bits/skip)" );		stats.println( "Top pointer skips: " + Util.format( towerData.bitsForTopSkipPointers ) + " bits (" + Util.format( towerData.bitsForTopSkipPointers/ (double)towerData.numberOfTopEntries ) + " bits/skip)" );		stats.println( "Lower bit skips: " + Util.format( towerData.bitsForLowerBitSkips ) + " bits (" + Util.format( towerData.bitsForLowerBitSkips/ (double)towerData.numberOfLowerEntries ) + " bits/skip)" );		stats.println( "Lower pointer skips: " + Util.format( towerData.bitsForLowerSkipPointers ) + " bits (" + Util.format( towerData.bitsForLowerSkipPointers/ (double)towerData.numberOfLowerEntries ) + " bits/skip)" );		stats.println( "Bit skips: " + Util.format( towerData.bitsForBitSkips() ) + " bits (" + Util.format( towerData.bitsForBitSkips()/ (double)towerData.numberOfEntries() ) + " bits/skip)" );		stats.println( "Pointer skips: " + Util.format( towerData.bitsForSkipPointers() ) + " bits (" + Util.format( towerData.bitsForSkipPointers()/ (double)towerData.numberOfEntries() ) + " bits/skip)" );	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -