📄 indexwriter.java
字号:
package it.unimi.dsi.mg4j.index;import it.unimi.dsi.mg4j.index.payload.Payload;import it.unimi.dsi.io.OutputBitStream;import it.unimi.dsi.util.Properties;import java.io.IOException;import java.io.PrintStream;/** An interface for classes that generate indices. * * <P>Implementations of this interface are used to write inverted lists in * sequential order, as follows: * <UL> * <li>to create a new inverted list, you must call {@link #newInvertedList()}; * <li>then, you must specified the frequency using {@link #writeFrequency(int)}; * <li>the document records follow; before writing a new document record, you must call {@link #newDocumentRecord()}; * note that, all in all, the number of calls to {@link #newDocumentRecord()} must be equal to the frequency; * <li>for each document record, you must supply the information needed for the index you are building * ({@linkplain #writeDocumentPointer(OutputBitStream, int) pointer}, * {@linkplain #writePayload(OutputBitStream, Payload) payload}, * {@linkplain #writePositionCount(OutputBitStream, int) count}, and * {@linkplain #writeDocumentPositions(OutputBitStream, int[], int, int, int) positions}, in this order). * </UL> * * <p>{@link #newDocumentRecord()} returns an {@link OutputBitStream} that must be used to write the document-record data. * Note that there is no guarantee that the returned {@link OutputBitStream} coincides with the * underlying bit stream. Moreover, there is no guarantee as to when the bits will be actually * written on the underlying stream, except that when starting a new inverted list, the previous * inverted list, if any, will be written onto the underlying stream. * * @author Paolo Boldi * @author Sebastiano Vigna * @since 1.2 */public interface IndexWriter { /** Starts a new inverted list. The previous inverted list, if any, is actually written * to the underlying bit stream. * * @return the position (in bytes) of the underlying bit stream where the new inverted * list starts. * @throws IllegalStateException if too few records were written for the previous inverted * list. */ long newInvertedList() throws IOException; /** Writes the frequency. * * @param frequency the (positive) number of document records that this inverted list will contain. * @return the number of bits written. */ int writeFrequency( final int frequency ) throws IOException; /** Starts a new document record. * * <P>This method must be called exactly exactly <var>f</var> times, where <var>f</var> is the frequency specified with * {@link #writeFrequency(int)}. * * @return the output bit stream where the next document record data should be written. * @throws IllegalStateException if too many records were written for the current inverted list, * or if there is no current inverted list. */ OutputBitStream newDocumentRecord() throws IOException; /** Writes a document pointer. * * <P>This method must be called immediately after {@link #newDocumentRecord()}. * * @param out the output bit stream where the pointer will be written. * @param pointer the document pointer. * @return the number of bits written. */ int writeDocumentPointer( final OutputBitStream out, final int pointer ) throws IOException; /** Writes the payload for the current document. * * <P>This method must be called immediately after {@link #writeDocumentPointer(OutputBitStream, int)}. * * @param out the output bit stream where the payload will be written. * @param payload the payload. * @return the number of bits written. */ int writePayload( final OutputBitStream out, final Payload payload ) throws IOException; /** Writes the count of the occurrences of the current term in the current document to the given {@link OutputBitStream}. * @param out the output stream where the occurrences should be written. * @param count the count. * @return the number of bits written. */ int writePositionCount( final OutputBitStream out, final int count ) throws IOException; /** Writes the positions of the occurrences of the current term in the current document to the given {@link OutputBitStream}. * * @param out the output stream where the occurrences should be written. * @param occ the position vector (a sequence of strictly increasing natural numbers). * @param offset the first valid entry in <code>occ</code>. * @param len the number of valid entries in <code>occ</code>. * @param docSize the size of the current document (only for Golomb and interpolative coding; you can safely pass -1 otherwise). * @return the number of bits written. * @throws IllegalStateException if there is no current inverted list. */ int writeDocumentPositions( final OutputBitStream out, final int[] occ, final int offset, final int len, final int docSize ) throws IOException; /** Returns the overall number of bits written onto the underlying stream(s). * * @return the number of bits written, according to the variables keeping statistical records. */ long writtenBits(); /** Returns properties of the index generated by this index writer. * * <p>This method should only be called after {@link #close()}. * It returns a new {@linkplain Properties property object} * containing values for (whenever appropriate) * {@link Index.PropertyKeys#DOCUMENTS}, {@link Index.PropertyKeys#TERMS}, * {@link Index.PropertyKeys#POSTINGS}, {@link Index.PropertyKeys#MAXCOUNT}, * {@link Index.PropertyKeys#INDEXCLASS}, {@link Index.PropertyKeys#CODING}, {@link Index.PropertyKeys#PAYLOADCLASS}, * {@link BitStreamIndex.PropertyKeys#SKIPQUANTUM}, and {@link BitStreamIndex.PropertyKeys#SKIPHEIGHT}. * * @return properties a new set of properties for the just created index. */ Properties properties(); /** Closes this index writer, completing the index creation process and releasing all resources. * * @throws IllegalStateException if too few records were written for the last inverted list. */ void close() throws IOException; /** Writes to the given print stream statistical information about the index just built. * This method must be called after {@link #close()}. * * @param stats a print stream where statistical information will be written. */ void printStats( final PrintStream stats );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -