📄 skipbitstreamindexwriter.java
字号:
// Produce a (single) tower of height s for ( i = s - 1; i >= 0; i-- ) { towerData.bitsForLowerSkipPointers += skip[k].writeGolomb( Fast.int2nat( ( skipPointer[k + ( 1 << i )] - basePointer ) - ( ( skipPointer[k + ( 1 << i + 1 )] - basePointer ) / 2 ) ), towerLowerB[ i ], towerLowerLog2B[ i ] ); if ( STATS && doinIt && writeStats ) pointerSkipLine = ( ( skipPointer[k + ( 1 << i )] - basePointer ) - ( ( skipPointer[k + ( 1 << i + 1 )] - basePointer ) / 2 ) ) + "\t" + pointerSkipLine; towerData.bitsForLowerBitSkips += skip[k].writeLongDelta( Fast.int2nat( (int) ( ( ( toTheEnd - distance[k + ( 1 << ( i + 1 ) )] - entryBitLength * ( i + 1 ) ) / 2 ) - ( toTheEnd - distance[k + ( 1 << i )] ) ) ) ); if ( STATS && doinIt && writeStats ) bitSkipLine = (int) ( ( ( toTheEnd - distance[k + ( 1 << ( i + 1 ) )] - entryBitLength * ( i + 1 ) ) / 2 ) - ( toTheEnd - distance[k + ( 1 << i )] ) ) + "\t" + bitSkipLine; if ( DEBUG && doinIt ) System.err.print( ( skipPointer[k + ( 1 << i )] - basePointer ) + ":" + ( toTheEnd - distance[k + ( 1 << i )] ) + " " ); } if ( s > 0 ) { // No length for single-entry towers. d = bitCount.writeDelta( Fast.int2nat( (int) skip[k].writtenBits() - ( s + 1 ) * entryBitLength ) ); towerData.bitsForTowerLengths += d; toTheEnd += d; } if ( STATS && doinIt && writeStats ) { if ( pointerSkipLine.length() > 0 ) pointerSkipStats.println( pointerSkipLine ); if ( bitSkipLine.length() > 0 ) bitSkipStats.println( bitSkipLine ); } toTheEnd += skip[k].writtenBits(); if ( DEBUG && doinIt ) System.err.print( " (" + (int) skip[k].writtenBits() + " bits)" ); towerData.numberOfLowerEntries += s; towerData.numberOfSkipTowers++; if ( DEBUG && doinIt ) System.err.println(); } distance[ k ] = toTheEnd; // Where are we? Just before the beginning of the k-th skip tower toTheEnd += cachePointer[ k ].writtenBits(); // Where are we? Just before the beginning of the k-th document record } } /** Write out the cache content. * * @param nextPointer the first pointer of the next block, or -1 if this is the last block. */ private void writeOutCache( final int nextPointer ) throws IOException { if ( DEBUG ) System.err.println( "Entered writeOutCache() with cache=" + cache + " (H is " + ( 1 << h ) + ", B is " + w + ")" ); cacheDataLength[ ( cache + q - 1 ) / q - 1 ] = (int)cacheDataOut.writtenBits(); /* Number of bits to go after the first pointer of the first record of the next block (or, if there is no other block in the current list, to go to the end of the list). */ long toTheEnd; // Record the new document pointer for the highest tower int nextAfter = ( ( cache + q ) - 1 ) / q; // This is ceil( cache / q ) if ( nextPointer >= 0 ) { skipPointer[nextAfter] = nextPointer; toTheEnd = writeOutPointer( bitCount, nextPointer ); } else { skipPointer[nextAfter] = currentDocument + 1; // Fake: just for the last block toTheEnd = 0; } distance[nextAfter] = 0; int k, s; long d; // Compute quantum length in bits (without towers) int quantumBitLength = 0, entryBitLength = 0; for ( d = k = 0; k <= ( ( cache - 1 ) / q ); k++ ) d += ( cachePointer[k].writtenBits() + cacheDataLength[ k ] ); quantumBitLength = (int)( ( ( d * q ) + ( cache - 1 ) ) / cache ); final TowerData td = new TowerData(); final Int2IntRBTreeMap candidates = new Int2IntRBTreeMap(); /* As a first try, we compute the tower costs using 0 as average entry bit length. */ tryTower( quantumBitLength, 0, toTheEnd, cacheSkipBitCount, td, false ); if ( td.numberOfSkipTowers > 0 ) { // There actually is at least a tower. /* Now we repeat this operation, trying to obtain the best value for the * average entry bit length. */ while( candidates.size() < MAX_TRY && ! candidates.containsValue( entryBitLength = (int)( td.bitsForTowers() / td.numberOfEntries() ) ) ) { td.clear(); tryTower( quantumBitLength, entryBitLength, toTheEnd, cacheSkipBitCount, td, false ); candidates.put( (int)( td.bitsForTowers() / td.numberOfEntries() ), entryBitLength ); } if ( ASSERTS ) assert candidates.size() < MAX_TRY; entryBitLength = candidates.get( candidates.firstIntKey() ); if ( STATS && System.getProperty( "freq" ) != null ) { final double freq = Double.parseDouble( System.getProperty( "freq" ) ); final double error = Integer.getInteger( "error" ).intValue() / 100.0; final double relativeFrequency = (double)frequency / numberOfDocuments; if ( ( writeStats = ( relativeFrequency >= freq * ( 1 - error ) && relativeFrequency <= freq * ( 1 + error ) ) ) ) { pointerSkipStats.println( "# " + currentTerm ); pointerTopSkipStats.println( "# " + currentTerm ); bitSkipStats.println( "# " + currentTerm + " " + quantumBitLength + " " + entryBitLength ); bitTopSkipStats.println( "# " + currentTerm + " " + quantumBitLength + " " + entryBitLength ); } } if ( DEBUG ) System.err.println( "Going to write tower at position " + obs.writtenBits() ); tryTower( quantumBitLength, entryBitLength, toTheEnd, cacheSkip, towerData, true ); } // Ready to write out cache int maxCacheDataLength = 0; for ( k = 0; k <= ( ( cache - 1 ) / q ); k++ ) if ( cacheDataLength[ k ] > maxCacheDataLength ) maxCacheDataLength = cacheDataLength[ k ]; /* We have two ways of writing out cached data. If all the data is still in the output bit * stream buffer, we just read it directly. Otherwise, we have to pour it into a temporary buffer. */ final byte[] buffer; final boolean direct; int pos = 0; cacheDataOut.align(); if ( cacheDataOut.buffer() != null ) { buffer = cacheDataOut.buffer(); direct = true; } else { cacheDataOut.flush(); buffer = new byte[ ( maxCacheDataLength + 7 ) / 8 ]; direct = false; cacheDataIn.flush(); cacheDataIn.position( 0 ); } for ( k = 0; k <= ( ( cache - 1 ) / q ); k++ ) { /* See comments above. */ s = ( k == 0 ) ? h : Fast.leastSignificantBit( k ); if ( cache < w ) s = Math.min( s, Fast.mostSignificantBit( ( cache / q ) - k ) ); d = cachePointer[k].writtenBits(); cachePointer[k].flush(); obs.write( cachePointerByte[k].array, d ); d = cacheSkip[k].writtenBits(); cacheSkip[k].flush(); if ( s >= 0 ) { if ( k == 0 ) { if ( prevQuantumBitLength < 0 ) { bitsForQuantumBitLengths += obs.writeLongDelta( quantumBitLength ); bitsForEntryBitLengths += obs.writeLongDelta( entryBitLength ); } else { bitsForQuantumBitLengths += obs.writeLongDelta( Fast.int2nat( quantumBitLength - prevQuantumBitLength ) ); bitsForEntryBitLengths += obs.writeLongDelta( Fast.int2nat( entryBitLength - prevEntryBitLength ) ); } prevQuantumBitLength = quantumBitLength; prevEntryBitLength = entryBitLength; numberOfBlocks++; } if ( s > 0 ) obs.writeDelta( Fast.int2nat( (int)d - entryBitLength * ( s + 1 ) ) ); // No length for single-entry towers. } else if ( ASSERTS ) assert d == 0; obs.write( cacheSkipByte[k].array, d ); if ( direct ) { obs.write( buffer, pos * 8, cacheDataLength[ k ] ); pos += ( cacheDataLength[ k ] + 7 ) / 8; } else { cacheDataIn.read( buffer, 0, ( cacheDataLength[ k ] + 7 ) / 8 ); obs.write( buffer, cacheDataLength[ k ] ); } } // Clean used caches for ( k = 0; k <= ( ( cache - 1 ) / q ); k++ ) { cachePointerByte[k].reset(); cachePointer[k].writtenBits( 0 ); cacheSkipByte[k].reset(); cacheSkip[k].writtenBits( 0 ); cacheDataOut.position( 0 ); cacheDataOut.writtenBits( 0 ); } cache = 0; if ( ASSERTS ) assert obs.writtenBits() == writtenBits(); } public long writtenBits() { return super.writtenBits() + towerData.bitsForTopSkipPointers + towerData.bitsForTopBitSkips + towerData.bitsForLowerSkipPointers + towerData.bitsForLowerBitSkips + towerData.bitsForTowerLengths + bitsForQuantumBitLengths + bitsForEntryBitLengths; } public Properties properties() { Properties result = super.properties(); result.setProperty( Index.PropertyKeys.INDEXCLASS, FileIndex.class.getName() ); result.setProperty( BitStreamIndex.PropertyKeys.SKIPQUANTUM, q ); result.setProperty( BitStreamIndex.PropertyKeys.SKIPHEIGHT, h ); return result; } public void printStats( final PrintStream stats ) { super.printStats( stats ); stats.println( "Skip towers: " + Util.format( towerData.numberOfSkipTowers ) + " (" + Util.format( towerData.bitsForTowers() ) + " bits [" + Util.format( towerData.bitsForTowers()*100.0/writtenBits() ) + "%], " + Util.format( towerData.bitsForTowers()/ (double)towerData.numberOfSkipTowers ) + " bits/tower)" ); stats.println( "Skip entries: " + Util.format( towerData.numberOfEntries() ) + " (" + Util.format( towerData.bitsForEntries() / (double)towerData.numberOfEntries()) + " bits/entry)" ); // Note that lengths are written approximately every other tower. stats.println( "Skip tower lengths: " + Util.format( towerData.bitsForTowerLengths ) + " bits (" + Util.format( 2.0 * towerData.bitsForTowerLengths/ towerData.numberOfSkipTowers ) + " bits/tower)" ); stats.println( "Quantum bit lengths: " + Util.format( bitsForQuantumBitLengths ) + " bits (" + Util.format( bitsForQuantumBitLengths/ (double)numberOfBlocks ) + " bits/block)" ); stats.println( "Entry bit lengths: " + Util.format(bitsForEntryBitLengths ) + " bits (" + Util.format( bitsForEntryBitLengths/ (double)numberOfBlocks ) + " bits/block)" ); stats.println( "Top bit skips: " + Util.format(towerData.bitsForTopBitSkips ) + " bits (" + Util.format( towerData.bitsForTopBitSkips / (double)towerData.numberOfTopEntries ) + " bits/skip)" ); stats.println( "Top pointer skips: " + Util.format( towerData.bitsForTopSkipPointers ) + " bits (" + Util.format( towerData.bitsForTopSkipPointers/ (double)towerData.numberOfTopEntries ) + " bits/skip)" ); stats.println( "Lower bit skips: " + Util.format( towerData.bitsForLowerBitSkips ) + " bits (" + Util.format( towerData.bitsForLowerBitSkips/ (double)towerData.numberOfLowerEntries ) + " bits/skip)" ); stats.println( "Lower pointer skips: " + Util.format( towerData.bitsForLowerSkipPointers ) + " bits (" + Util.format( towerData.bitsForLowerSkipPointers/ (double)towerData.numberOfLowerEntries ) + " bits/skip)" ); stats.println( "Bit skips: " + Util.format( towerData.bitsForBitSkips() ) + " bits (" + Util.format( towerData.bitsForBitSkips()/ (double)towerData.numberOfEntries() ) + " bits/skip)" ); stats.println( "Pointer skips: " + Util.format( towerData.bitsForSkipPointers() ) + " bits (" + Util.format( towerData.bitsForSkipPointers()/ (double)towerData.numberOfEntries() ) + " bits/skip)" ); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -