📄 bitstreamindexwriter.java
字号:
if ( pointerCoding == Coding.GOLOMB ) { b = BitStreamIndex.golombModulus( frequency, numberOfDocuments ); log2b = Fast.mostSignificantBit( b ); } state = BEFORE_DOCUMENT_RECORD; bitsForFrequencies += bitCount; return bitCount; } public OutputBitStream newDocumentRecord() throws IOException { if ( frequency == writtenDocuments ) throw new IllegalStateException( "Document record overflow (written " + this.frequency + " already)" ); if ( state != BEFORE_DOCUMENT_RECORD ) throw new IllegalStateException( "Trying to start new document record in state " + state ); writtenDocuments++; numberOfPostings++; lastDocument = currentDocument; state = BEFORE_POINTER; return obs; } public int writeDocumentPointer( final OutputBitStream out, final int pointer ) throws IOException { if ( state != BEFORE_POINTER ) throw new IllegalStateException( "Trying to write pointer in state " + state ); currentDocument = pointer; int bitCount = 0; if ( frequency != numberOfDocuments ) { // We do not write pointers for everywhere occurring documents. switch( pointerCoding ) { case SHIFTED_GAMMA: bitCount = out.writeShiftedGamma( pointer - lastDocument - 1 ); break; case GAMMA: bitCount = out.writeGamma( pointer - lastDocument - 1 ); break; case DELTA: bitCount = out.writeDelta( pointer - lastDocument - 1 ); break; case GOLOMB: bitCount = out.writeGolomb( pointer - lastDocument - 1, b, log2b ); break; default: throw new IllegalStateException( "The required pointer coding (" + pointerCoding + ") is not supported." ); } } else if ( pointer - lastDocument != 1 ) throw new IllegalStateException( "Term " + currentTerm + " has frequency equal to the number of documents, but pointers are not consecutive integers" ); state = hasPayloads ? BEFORE_PAYLOAD : hasCounts ? BEFORE_COUNT : BEFORE_DOCUMENT_RECORD; bitsForPointers += bitCount; return bitCount; } public int writePayload( final OutputBitStream out, final Payload payload ) throws IOException { if ( frequency < 0 ) throw new IllegalStateException( "Trying to write payload without calling newInvertedList" ); if ( state != BEFORE_PAYLOAD ) throw new IllegalStateException( "Trying to write payload in state " + state ); final int count = payload.write( out ); bitsForPayloads += count; state = hasCounts ? BEFORE_COUNT : BEFORE_DOCUMENT_RECORD; return count; } public void close() throws IOException { if ( state != BEFORE_DOCUMENT_RECORD && state != BEFORE_INVERTED_LIST ) throw new IllegalStateException( "Trying to close index in state " + state ); if ( frequency >= 0 && frequency != writtenDocuments ) throw new IllegalStateException( "The number of document records (" + this.writtenDocuments + ") does not match the frequency (" + this.frequency + ")" ); if ( writtenBits() != obs.writtenBits() ) throw new IllegalStateException( "Written bits count mismatch: we say " + writtenBits() + ", the stream says " + obs.writtenBits() ); if ( offset != null ) { offset.writeLongGamma( obs.writtenBits() - lastInvertedListPos ); offset.close(); } obs.close(); } public int writePositionCount( final OutputBitStream out, final int count ) throws IOException { if ( frequency < 0 ) throw new IllegalStateException( "Trying to write count without calling newInvertedList" ); if ( state != BEFORE_COUNT ) throw new IllegalStateException( "Trying to write count in state " + state ); final int bitCount; numberOfOccurrences += count; switch( countCoding ) { case SHIFTED_GAMMA: bitCount = out.writeShiftedGamma( count - 1 ); break; case GAMMA: bitCount = out.writeGamma( count - 1 ); break; case UNARY: bitCount = out.writeUnary( count - 1 ); break; case DELTA: bitCount = out.writeDelta( count - 1 ); break; default: throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." ); } state = hasPositions ? BEFORE_POSITIONS : BEFORE_DOCUMENT_RECORD; bitsForCounts += bitCount; return bitCount; } public int writeDocumentPositions( final OutputBitStream out, final int[] occ, final int offset, final int len, final int docSize ) throws IOException { if ( frequency < 0 ) throw new IllegalStateException( "Trying to write occurrences without calling newInvertedList" ); if ( state != BEFORE_POSITIONS ) throw new IllegalStateException( "Trying to write positions in state " + state ); if ( ASSERTS && docSize > 0 ) for( int i = 0; i< len; i++ ) assert occ[ offset + i ] < docSize : "Position " + occ[ offset + i ] + " for document " + currentDocument + " is too large; size is " + docSize; int i; int prev = -1; int bitCount = 0; final int end = offset + len; switch( positionCoding ) { case GAMMA: for( i = offset; i < end; i++ ) { bitCount += out.writeGamma( occ[ i ] - prev - 1 ); prev = occ[ i ]; } break; case DELTA: for( i = offset; i < end; i++ ) { bitCount += out.writeDelta( occ[ i ] - prev - 1 ); prev = occ[ i ]; } break; case SHIFTED_GAMMA: for( i = offset; i < end; i++ ) { bitCount += out.writeShiftedGamma( occ[ i ] - prev - 1 ); prev = occ[ i ]; } break; case GOLOMB: if ( len < 3 ) { for( i = 0; i < len; i++ ) bitCount += out.writeMinimalBinary( occ[ i ], docSize ); break; } // We compute b and log2b for positions final int positionB = BitStreamIndex.golombModulus( len, docSize ); final int positionLog2b = Fast.mostSignificantBit( positionB ); for( i = offset; i < end; i++ ) { bitCount += out.writeGolomb( occ[ i ] - prev - 1, positionB, positionLog2b ); prev = occ[ i ]; } break; case SKEWED_GOLOMB: if ( len < 3 ) { for( i = 0; i < len; i++ ) bitCount += out.writeMinimalBinary( occ[ i ], docSize ); break; } if ( sortedDeltas.length < len ) sortedDeltas = new int[ len ]; System.arraycopy( occ, offset, sortedDeltas, 0, len ); i = len - 1; while( i-- != 0 ) sortedDeltas[ i + 1 ] -= sortedDeltas[ i ] + 1; Sorting.nth_element( sortedDeltas, 0, len / 2, len ); final int sb = sortedDeltas[ len / 2 ] + 1; // The median of deltas bitCount = out.writeMinimalBinary( sb - 1, docSize ); for( i = offset; i < end; i++ ) { bitCount += out.writeSkewedGolomb( occ[ i ] - prev - 1, sb ); prev = occ[ i ]; } break; case INTERPOLATIVE: bitCount = InterpolativeCoding.write( out, occ, 0, len, 0, docSize - 1 ); break; default: throw new IllegalStateException( "The required position coding (" + positionCoding + ") is not supported." ); } state = BEFORE_DOCUMENT_RECORD; bitsForPositions += bitCount; if ( len > maxCount ) maxCount = len; return bitCount; } public long writtenBits() { return bitsForFrequencies + bitsForPointers + bitsForPayloads + bitsForCounts + bitsForPositions; } public Properties properties() { Properties result = new Properties(); result.setProperty( Index.PropertyKeys.DOCUMENTS, numberOfDocuments ); result.setProperty( Index.PropertyKeys.TERMS, currentTerm + 1 ); result.setProperty( Index.PropertyKeys.POSTINGS, numberOfPostings ); result.setProperty( Index.PropertyKeys.MAXCOUNT, maxCount ); result.setProperty( Index.PropertyKeys.INDEXCLASS, FileIndex.class.getName() ); // We save all flags, except for the PAYLOAD component, which is just used internally. for( Map.Entry<Component,Coding> e: flags.entrySet() ) if ( e.getKey() != Component.PAYLOADS ) result.addProperty( Index.PropertyKeys.CODING, new MutableString().append( e.getKey() ).append( ':' ).append( e.getValue() ) ); return result; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -