⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 skipbitstreamindexwriter.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
			try {				pointerSkipStats = new PrintWriter( new java.io.FileWriter( "pointerSkip.stats" ) );				pointerTopSkipStats = new PrintWriter( new java.io.FileWriter( "pointerTopSkip.stats" ) );				bitSkipStats = new PrintWriter( new java.io.FileWriter( "bitSkip.stats" ) );				bitTopSkipStats = new PrintWriter( new java.io.FileWriter( "bitTopSkip.stats" ) );								String comment = "# " + System.getProperty( "freq" ) + "\u00b1" + Integer.getInteger( "error" ) + "%";				pointerSkipStats.println( comment );				pointerTopSkipStats.println( comment );				bitSkipStats.println( comment );				bitTopSkipStats.println( comment );			} catch ( IOException e ) {				System.err.println( "Can't open stat files: " + e );			}		}	}	public long newInvertedList() throws IOException {		if ( cache != 0 ) writeOutCache( -1 );		return super.newInvertedList();	}	public int writeFrequency( final int frequency ) throws IOException {		int res = super.writeFrequency( frequency );		prevQuantumBitLength = prevEntryBitLength = -1;			if ( DEBUG ) System.err.println( "----------- " + currentTerm + " (" + frequency + ")" );		final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, numberOfDocuments, q );		for( int i = Math.min( h, Fast.mostSignificantBit( frequency / q ) ); i >= 0; i-- ) {			towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 );			towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] );			towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i );			towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] );			pointerPrediction[ i ] = (int)( ( q * ( 1L << i ) * numberOfDocuments + frequency / 2 ) / frequency );		}		return res;	}	public OutputBitStream newDocumentRecord() throws IOException {		super.newDocumentRecord();		return cacheDataOut;	}	public int writeDocumentPointer( final OutputBitStream out, final int pointer ) throws IOException {		// If the previous block is over, write it out!		if ( cache == w ) writeOutCache( pointer );		// Record data pointer if we are on a skip; otherwise, write it to the cache.		if ( cache % q == 0 ) {			if ( cache / q > 0 ) cacheDataLength[ cache / q - 1 ] = (int)cacheDataOut.writtenBits();			cacheDataOut.align();			cacheDataOut.writtenBits( 0 );			skipPointer[ cache / q ] = pointer;			return super.writeDocumentPointer( cachePointer[ cache++ / q ], pointer );		} 		else {			cache++;			return super.writeDocumentPointer( cacheDataOut, pointer );		}	}	private int writeOutPointer( final OutputBitStream out, final int pointer ) throws IOException {		if ( frequency == numberOfDocuments ) return 0; // We do not write pointers for everywhere occurring terms.		switch ( pointerCoding ) {			case GAMMA:				return out.writeGamma( pointer - lastDocument - 1 );			case DELTA:				return out.writeDelta( pointer - lastDocument - 1 );			case GOLOMB:				return out.writeGolomb( pointer - lastDocument - 1, b, log2b );			default:				throw new IllegalStateException( "The required pointer coding (" + pointerCoding + ") is not supported." );		}	}	public void close() throws IOException {		if ( cache != 0 ) writeOutCache( -1 );		super.close();				cacheDataIn.close();		cacheDataOut.close();		tempFile.delete();				if ( STATS ) {			pointerSkipStats.close();			pointerTopSkipStats.close();			bitSkipStats.close();			bitTopSkipStats.close();		}	}	/** A structure maintaining statistical data about tower construction. */	public static class TowerData {		/** The number of bits written for bit skips at the top of a tower. */		public long bitsForTopBitSkips;		/** The number of bits written for skip pointers at the top of a tower. */		public long bitsForTopSkipPointers;		/** The number of bits written for bit skips in the lower part of a tower. */		public long bitsForLowerBitSkips;		/** The number of bits written for skip pointers in the lower part of a tower. */		public long bitsForLowerSkipPointers;		/** The number of bits written for tower lengths. */		public long bitsForTowerLengths;		/** The number of written skip towers. */		public long numberOfSkipTowers;		/** The number of written top skip entries. */		public long numberOfTopEntries;		/** The number of written lower skip entries. */		public long numberOfLowerEntries;		/** Clear all fields of this tower data. */		void clear() {			bitsForTopBitSkips = 0;			bitsForTopSkipPointers = 0;			bitsForLowerBitSkips = 0;			bitsForLowerSkipPointers = 0;			bitsForTowerLengths = 0;			numberOfSkipTowers = 0;			numberOfTopEntries = 0;			numberOfLowerEntries = 0;		}		/** Returns the overall number of bits used for skip pointers.		 * @return the overall number of bits used for skip pointers.		 */		public long bitsForSkipPointers() { return bitsForTopSkipPointers + bitsForLowerSkipPointers; }		/** Returns the overall number of bits used for bit skips. 		 * @return the overall number of bits used for bit skips.		 */		public long bitsForBitSkips() { return bitsForTopBitSkips + bitsForLowerBitSkips; }		/** Returns the overall number of bits used for tower entries (bits for tower lengths are not included).		 * @return the overall number of bits used for tower entries.		 */		public long bitsForEntries() { return bitsForSkipPointers() + bitsForBitSkips(); }		/** Returns the overall number of bits used for towers.		 * @return the overall number of bits used for towers.		 */		public long bitsForTowers() { return bitsForTowerLengths + bitsForEntries(); }		/** Returns the overall number of entries.		 * @return the overall number of entries.		 */		public long numberOfEntries() { return numberOfTopEntries + numberOfLowerEntries; }	}		/** The <var>k</var>-th entry of this array contains the number of bits from the start of	 * the <var>k</var>-th skip tower up to the end of the current block (more precisely,	 * to the point that should be reached via skipping, which is just after the document pointer).	 * Indices are skip indices. It is used just by {@link #tryTower(int, int, long, OutputBitStream[], TowerData, boolean)}, 	 * but it is declared here for efficiency.	 */	final private long[] distance;	/** The temporary file dumping the index data contained in a block. */	final private File tempFile;	/** Whether we should write stats. */	private boolean writeStats;		/** Computes the towers.	 * 	 * @param quantumBitLength the length in bits of a quantum.	 * @param entryBitLength the estimated length in bits of a tower entry.	 * @param toTheEnd the number of bits that must be skipped to reach the next tower (usually,	 * the length of the first pointer of the next block or 0 if this is to be the last block).	 * @param skip an array of output bit stream where the data related to each tower will be written.	 * @param towerData will be filled with statistical date about the towers.	 * @param doinIt if true, we are actually writing a tower, not just trying.	 */	private void tryTower( final int quantumBitLength, final int entryBitLength, long toTheEnd, final OutputBitStream[] skip, final TowerData towerData, final boolean doinIt ) throws IOException {		int i, k, s;		long d;		int basePointer;		// truncated is true only for those towers (in defective blocks) whose height is strictly smaller than the height they should have		boolean truncated = false;		if ( DEBUG && doinIt ) System.err.println( "Writing out tower for term " + currentTerm + "; quantumBitLength=" + quantumBitLength + " entryBitLength=" + entryBitLength );				for ( k = ( cache - 1 ) / q; k >= 0; k-- ) {			// Where are we? At the end of the k-th quantum. So toTheEnd must be increased by			// the length of the data contained in the same quantum, moving us...			toTheEnd += cacheDataLength[ k ];			// ...just after the k-th skip tower.			// We compute the maximum valid index of the skip tower (*MUST* be kept in sync with the subsequent loop).			s = ( k == 0 ) ? h : Fast.leastSignificantBit( k );			// This test handles defective blocks. In particular, for defective quanta s=-1,			// yielding no skipping data at all for such quanta. truncated is true if the			// current tower is truncated w.r.t. the infinite skip list.			if ( cache < w ) {				final int upperBound = Fast.mostSignificantBit( ( cache / q ) - k );				if ( s > upperBound ) {					s = upperBound;					truncated = true;				} else truncated = false;			}			else truncated = k == 0;						skip[ k ].writtenBits( 0 );			if ( s >= 0 ) {				if ( DEBUG && doinIt ) System.err.print( "% (" + k + ") [" + skipPointer[ k ] + "] " );				basePointer = skipPointer[ k ];				/* If the current tower is truncated, we must actually write the top of the tower.				 * The top must be forecast in a Bernoullian way: we write it as a difference from the average pointer skip, 				 * which is q 2^s / relativeFrequency. */				if ( truncated ) {					towerData.numberOfTopEntries++;					towerData.bitsForTopSkipPointers += skip[k].writeGolomb( Fast.int2nat( skipPointer[ k + ( 1 << s ) ] - basePointer - pointerPrediction[ s ] ), towerTopB[ s ], towerTopLog2B[ s ] );					towerData.bitsForTopBitSkips += skip[k].writeLongDelta( Fast.int2nat( 						(int) ( ( toTheEnd - distance[k + ( 1 << s )] ) -								( quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) ) ) )					);				}								if ( DEBUG && doinIt ) System.err.print( ( truncated ? "" : "(" ) + ( skipPointer[ k + ( 1 << s ) ] - basePointer ) + ":" + ( toTheEnd - distance[k + ( 1 << s )] ) + ( truncated ? " " : ") " ) );				if ( STATS && doinIt && writeStats ) {					// These *MUST* be kept in sync with the writes above.					if ( truncated ) {						pointerTopSkipStats.println( s + "\t" + ( skipPointer[ k + ( 1 << s ) ] - basePointer - pointerPrediction[ s ] ) );						bitTopSkipStats.println( s + "\t" + (int) ( ( toTheEnd - distance[k + ( 1 << s )] ) - ( quantumBitLength * ( 1 << s ) + entryBitLength * ( ( 1 << s + 1 ) - s - 2 ) ) ) );					}					pointerSkipLine = "";					bitSkipLine = "";				}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -