📄 skipbitstreamindexwriter.java
字号:
try { pointerSkipStats = new PrintWriter( new java.io.FileWriter( "pointerSkip.stats" ) ); pointerTopSkipStats = new PrintWriter( new java.io.FileWriter( "pointerTopSkip.stats" ) ); bitSkipStats = new PrintWriter( new java.io.FileWriter( "bitSkip.stats" ) ); bitTopSkipStats = new PrintWriter( new java.io.FileWriter( "bitTopSkip.stats" ) ); String comment = "# " + System.getProperty( "freq" ) + "\u00b1" + Integer.getInteger( "error" ) + "%"; pointerSkipStats.println( comment ); pointerTopSkipStats.println( comment ); bitSkipStats.println( comment ); bitTopSkipStats.println( comment ); } catch ( IOException e ) { System.err.println( "Can't open stat files: " + e ); } } } public long newInvertedList() throws IOException { if ( cache != 0 ) writeOutCache( -1 ); return super.newInvertedList(); } public int writeFrequency( final int frequency ) throws IOException { int res = super.writeFrequency( frequency ); prevQuantumBitLength = prevEntryBitLength = -1; if ( DEBUG ) System.err.println( "----------- " + currentTerm + " (" + frequency + ")" ); final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, numberOfDocuments, q ); for( int i = Math.min( h, Fast.mostSignificantBit( frequency / q ) ); i >= 0; i-- ) { towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 ); towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] ); towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i ); towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] ); pointerPrediction[ i ] = (int)( ( q * ( 1L << i ) * numberOfDocuments + frequency / 2 ) / frequency ); } return res; } public OutputBitStream newDocumentRecord() throws IOException { super.newDocumentRecord(); return cacheDataOut; } public int writeDocumentPointer( final OutputBitStream out, final int pointer ) throws IOException { // If the previous block is over, write it out! if ( cache == w ) writeOutCache( pointer ); // Record data pointer if we are on a skip; otherwise, write it to the cache. if ( cache % q == 0 ) { if ( cache / q > 0 ) cacheDataLength[ cache / q - 1 ] = (int)cacheDataOut.writtenBits(); cacheDataOut.align(); cacheDataOut.writtenBits( 0 ); skipPointer[ cache / q ] = pointer; return super.writeDocumentPointer( cachePointer[ cache++ / q ], pointer ); } else { cache++; return super.writeDocumentPointer( cacheDataOut, pointer ); } } private int writeOutPointer( final OutputBitStream out, final int pointer ) throws IOException { if ( frequency == numberOfDocuments ) return 0; // We do not write pointers for everywhere occurring terms. switch ( pointerCoding ) { case GAMMA: return out.writeGamma( pointer - lastDocument - 1 ); case DELTA: return out.writeDelta( pointer - lastDocument - 1 ); case GOLOMB: return out.writeGolomb( pointer - lastDocument - 1, b, log2b ); default: throw new IllegalStateException( "The required pointer coding (" + pointerCoding + ") is not supported." ); } } public void close() throws IOException { if ( cache != 0 ) writeOutCache( -1 ); super.close(); cacheDataIn.close(); cacheDataOut.close(); tempFile.delete(); if ( STATS ) { pointerSkipStats.close(); pointerTopSkipStats.close(); bitSkipStats.close(); bitTopSkipStats.close(); } } /** A structure maintaining statistical data about tower construction. */ public static class TowerData { /** The number of bits written for bit skips at the top of a tower. */ public long bitsForTopBitSkips; /** The number of bits written for skip pointers at the top of a tower. */ public long bitsForTopSkipPointers; /** The number of bits written for bit skips in the lower part of a tower. */ public long bitsForLowerBitSkips; /** The number of bits written for skip pointers in the lower part of a tower. */ public long bitsForLowerSkipPointers; /** The number of bits written for tower lengths. */ public long bitsForTowerLengths; /** The number of written skip towers. */ public long numberOfSkipTowers; /** The number of written top skip entries. */ public long numberOfTopEntries; /** The number of written lower skip entries. */ public long numberOfLowerEntries; /** Clear all fields of this tower data. */ void clear() { bitsForTopBitSkips = 0; bitsForTopSkipPointers = 0; bitsForLowerBitSkips = 0; bitsForLowerSkipPointers = 0; bitsForTowerLengths = 0; numberOfSkipTowers = 0; numberOfTopEntries = 0; numberOfLowerEntries = 0; } /** Returns the overall number of bits used for skip pointers. * @return the overall number of bits used for skip pointers. */ public long bitsForSkipPointers() { return bitsForTopSkipPointers + bitsForLowerSkipPointers; } /** Returns the overall number of bits used for bit skips. * @return the overall number of bits used for bit skips. */ public long bitsForBitSkips() { return bitsForTopBitSkips + bitsForLowerBitSkips; } /** Returns the overall number of bits used for tower entries (bits for tower lengths are not included). * @return the overall number of bits used for tower entries. */ public long bitsForEntries() { return bitsForSkipPointers() + bitsForBitSkips(); } /** Returns the overall number of bits used for towers. * @return the overall number of bits used for towers. */ public long bitsForTowers() { return bitsForTowerLengths + bitsForEntries(); } /** Returns the overall number of entries. * @return the overall number of entries. */ public long numberOfEntries() { return numberOfTopEntries + numberOfLowerEntries; } } /** The <var>k</var>-th entry of this array contains the number of bits from the start of * the <var>k</var>-th skip tower up to the end of the current block (more precisely, * to the point that should be reached via skipping, which is just after the document pointer). * Indices are skip indices. It is used just by {@link #tryTower(int, int, long, OutputBitStream[], TowerData, boolean)}, * but it is declared here for efficiency. */ final private long[] distance; /** The temporary file dumping the index data contained in a block. */ final private File tempFile; /** Whether we should write stats. */ private boolean writeStats; /** Computes the towers. * * @param quantumBitLength the length in bits of a quantum. * @param entryBitLength the estimated length in bits of a tower entry. * @param toTheEnd the number of bits that must be skipped to reach the next tower (usually, * the length of the first pointer of the next block or 0 if this is to be the last block). * @param skip an array of output bit stream where the data related to each tower will be written. * @param towerData will be filled with statistical date about the towers. * @param doinIt if true, we are actually writing a tower, not just trying. */ private void tryTower( final int quantumBitLength, final int entryBitLength, long toTheEnd, final OutputBitStream[] skip, final TowerData towerData, final boolean doinIt ) throws IOException { int i, k, s; long d; int basePointer; // truncated is true only for those towers (in defective blocks) whose height is strictly smaller than the height they should have boolean truncated = false; if ( DEBUG && doinIt ) System.err.println( "Writing out tower for term " + currentTerm + "; quantumBitLength=" + quantumBitLength + " entryBitLength=" + entryBitLength ); for ( k = ( cache - 1 ) / q; k >= 0; k-- ) { // Where are we? At the end of the k-th quantum. So toTheEnd must be increased by // the length of the data contained in the same quantum, moving us... toTheEnd += cacheDataLength[ k ]; // ...just after the k-th skip tower. // We compute the maximum valid index of the skip tower (*MUST* be kept in sync with the subsequent loop). s = ( k == 0 ) ? h : Fast.leastSignificantBit( k ); // This test handles defective blocks. In particular, for defective quanta s=-1, // yielding no skipping data at all for such quanta. truncated is true if the // current tower is truncated w.r.t. the infinite skip list. if ( cache < w ) { final int upperBound = Fast.mostSignificantBit( ( cache / q ) - k ); if ( s > upperBound ) { s = upperBound; truncated = true; } else truncated = false; } else truncated = k == 0; skip[ k ].writtenBits( 0 ); if ( s >= 0 ) { if ( DEBUG && doinIt ) System.err.print( "% (" + k + ") [" + skipPointer[ k ] + "] " ); basePointer = skipPointer[ k ]; /* If the current tower is truncated, we must actually write the top of the tower. * The top must be forecast in a Bernoullian way: we write it as a difference from the average pointer skip, * which is q 2^s / relativeFrequency. */ if ( truncated ) { towerData.numberOfTopEntries++; towerData.bitsForTopSkipPointers += skip[k].writeGolomb( Fast.int2nat( skipPointer[ k + ( 1 << s ) ] - basePointer - pointerPrediction[ s ] ), towerTopB[ s ], towerTopLog2B[ s ] ); towerData.bitsForTopBitSkips += skip[k].writeLongDelta( Fast.int2nat( (int) ( ( toTheEnd - distance[k + ( 1 << s )] ) - ( quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) ) ) ) ); } if ( DEBUG && doinIt ) System.err.print( ( truncated ? "" : "(" ) + ( skipPointer[ k + ( 1 << s ) ] - basePointer ) + ":" + ( toTheEnd - distance[k + ( 1 << s )] ) + ( truncated ? " " : ") " ) ); if ( STATS && doinIt && writeStats ) { // These *MUST* be kept in sync with the writes above. if ( truncated ) { pointerTopSkipStats.println( s + "\t" + ( skipPointer[ k + ( 1 << s ) ] - basePointer - pointerPrediction[ s ] ) ); bitTopSkipStats.println( s + "\t" + (int) ( ( toTheEnd - distance[k + ( 1 << s )] ) - ( quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) ) ) ); } pointerSkipLine = ""; bitSkipLine = ""; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -