📄 bitstreamindexreader.java
字号:
throw new IllegalStateException( "The required frequency coding (" + index.frequencyCoding + ") is not supported." ); } hasPointers = frequency < index.numberOfDocuments; // We compute the modulus used for pointer Golomb coding if ( pointerCoding == Coding.GOLOMB ) { if ( hasPointers ) { b = BitStreamIndex.golombModulus( frequency, index.numberOfDocuments ); log2b = Fast.mostSignificantBit( b ); } } if ( hasSkips ) { quantumBitLength = entryBitLength = -1; lowest = Integer.MAX_VALUE; if ( ASSERTS ) for( int i = height; i > Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i-- ) towerTopB[ i ] = towerLowerB[ i ] = pointerPrediction[ i ] = -1; final long pointerQuantumSigma = BitStreamIndex.quantumSigma( frequency, index.numberOfDocuments, quantum ); for( int i = Math.min( height, Fast.mostSignificantBit( frequency >> quantumDivisionShift ) ); i >= 0; i-- ) { towerTopB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i + 1 ); towerTopLog2B[ i ] = Fast.mostSignificantBit( towerTopB[ i ] ); towerLowerB[ i ] = BitStreamIndex.gaussianGolombModulus( pointerQuantumSigma, i ); towerLowerLog2B[ i ] = Fast.mostSignificantBit( towerLowerB[ i ] ); pointerPrediction[ i ] = (int)( ( quantum * ( 1L << i ) * index.numberOfDocuments + frequency / 2 ) / frequency ); } } count = -1; currentDocument = -1; numberOfDocumentRecord = -1; state = BEFORE_POINTER; } public Index index() { return keyIndex; } public int frequency() { return frequency; } private void ensureCurrentDocument() { if ( currentDocument < 0 ) throw new IllegalStateException( "nextDocument() has never been called for (term=" + currentTerm + ")" ); if ( currentDocument == Integer.MAX_VALUE ) throw new IllegalStateException( "This reader is positioned beyond the end of list of (term=" + currentTerm + ")" ); } /** Returns whether there are no more document records in the current inverted list. * * <p>This method returns true if the last document pointer of the current inverted * list has been read. It makes no distinction as to where (inside the last document * record) this reader is currently positioned. In particular, this method will * return true independently of whether count and positions have been read or not (we * note by passing that this is the only sensible behaviour, as you can build indices * with or without counts/positions). * * <p>This method will return true also when this reader is positioned <em>beyond</em> * the last document pointer. In this case, {@link #currentDocumentPointer()} will * return {@link Integer#MAX_VALUE}. * * @return true whether there are no more document records in the current inverted list. */ private boolean endOfList() { if ( ASSERTS ) assert numberOfDocumentRecord <= frequency; return numberOfDocumentRecord >= frequency - 1; } public int document() { if ( ASSERTS ) ensureCurrentDocument(); return currentDocument; } public Payload payload() throws IOException { if ( DEBUG ) System.err.println( this + ".payload()" ); if ( ASSERTS ) ensureCurrentDocument(); if ( ! hasPayloads ) throw new UnsupportedOperationException( "This index ("+ index + ") does not contain payloads" ); if ( state <= BEFORE_PAYLOAD ) { if ( state == BEFORE_TOWER ) readTower(); payload.read( ibs ); state = hasCounts ? BEFORE_COUNT : BEFORE_POINTER; } return payload; } public int count() throws IOException { if ( DEBUG ) System.err.println( this + ".count()" ); if ( count != -1 ) return count; if ( ASSERTS ) ensureCurrentDocument(); if ( ! hasCounts ) throw new UnsupportedOperationException( "This index (" + index + ") does not contain counts" ); if ( state == BEFORE_TOWER ) readTower(); if ( state == BEFORE_PAYLOAD ) payload.read( ibs ); { if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException(); state = hasPositions ? BEFORE_POSITIONS : BEFORE_POINTER; switch( countCoding ) { case UNARY: count = ibs.readUnary() + 1; break; case SHIFTED_GAMMA: count = ibs.readShiftedGamma() + 1; break; case GAMMA: count = ibs.readGamma() + 1; break; case DELTA: count = ibs.readDelta() + 1; break; default: throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." ); } } return count; } /** We read positions, assuming state <= BEFORE_POSITIONS */ @SuppressWarnings("unused") protected void updatePositionCache() throws IOException { if ( ASSERTS ) assert state <= BEFORE_POSITIONS; if ( ! hasPositions ) throw new UnsupportedOperationException( "This index(" + index + ") does not contain positions" ); if ( state < BEFORE_POSITIONS ) { if ( state == BEFORE_TOWER ) readTower(); if ( state == BEFORE_PAYLOAD ) payload.read( ibs ); if ( state == BEFORE_COUNT ) { if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException(); switch( countCoding ) { case UNARY: count = ibs.readUnary() + 1; break; case SHIFTED_GAMMA: count = ibs.readShiftedGamma() + 1; break; case GAMMA: count = ibs.readGamma() + 1; break; case DELTA: count = ibs.readDelta() + 1; break; default: throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." ); } } } if ( count > positionCache.length ) positionCache = new int[ Math.max( positionCache.length * 2, count ) ]; final int[] occ = positionCache; state = BEFORE_POINTER; switch( positionCoding ) { case SHIFTED_GAMMA: ibs.readShiftedGammas( occ, count ); for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1; return; case GAMMA: ibs.readGammas( occ, count ); for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1; return; case DELTA: ibs.readDeltas( occ, count ); for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1; return; case GOLOMB: if ( ASSERTS ) assert index.sizes != null; int docSize = index.sizes.getInt( currentDocument ); if ( count < 3 ) for( int i = 0; i < count; i++ ) occ[ i ] = ibs.readMinimalBinary( docSize ); else { final int bb = BitStreamIndex.golombModulus( count, docSize ); int prev = -1; if ( bb != 0 ) { final int log2bb = Fast.mostSignificantBit( bb ); for( int i = 0; i < count; i++ ) occ[ i ] = prev = ibs.readGolomb( bb, log2bb ) + prev + 1; } else for ( int i = 0; i < count; i++ ) occ[ i ] = i; } return; case SKEWED_GOLOMB: if ( ASSERTS ) assert index.sizes != null; int docSize2 = index.sizes.getInt( currentDocument ); if ( count < 3 ) for( int i = 0; i < count; i++ ) occ[ i ] = ibs.readMinimalBinary( docSize2 ); else { final int sb = ibs.readMinimalBinary( docSize2 ) + 1; int prev2 = -1; for( int i = 0; i < count; i++ ) occ[ i ] = prev2 = ibs.readSkewedGolomb( sb ) + prev2 + 1; } return; case INTERPOLATIVE: it.unimi.dsi.mg4j.io.InterpolativeCoding.read( ibs, occ, 0, count, 0, index.sizes.getInt( currentDocument ) - 1 ); return; default: throw new IllegalStateException( "The required position coding (" + index.positionCoding + ") is not supported." ); } } public IntIterator positions() throws IOException { if ( ASSERTS ) ensureCurrentDocument(); if ( state <= BEFORE_POSITIONS ) updatePositionCache(); return IntIterators.wrap( positionCache, 0, count ); } public int[] positionArray() throws IOException { if ( ASSERTS ) ensureCurrentDocument(); if ( state <= BEFORE_POSITIONS ) updatePositionCache(); return positionCache; } // TODO: check who's using this (positionArray() is actually faster now) public int positions( final int[] position ) throws IOException { if ( ASSERTS ) ensureCurrentDocument(); if ( state <= BEFORE_POSITIONS ) updatePositionCache(); // And also that positions have been read if ( position.length < count ) return -count; for( int i = count; i-- != 0; ) position[ i ] = this.positionCache[ i ]; return count; } public int nextDocument() throws IOException { if ( DEBUG ) System.err.println( "{" + this + "} nextDocument()" ); if ( state != BEFORE_POINTER ) { if ( state == BEFORE_TOWER ) readTower(); if ( state == BEFORE_PAYLOAD ) payload.read( ibs ); if ( state == BEFORE_COUNT ) { if ( ASSERTS && state != BEFORE_COUNT ) throw new IllegalStateException(); state = hasPositions ? BEFORE_POSITIONS : BEFORE_POINTER; switch( countCoding ) { case UNARY: count = ibs.readUnary() + 1; break; case SHIFTED_GAMMA: count = ibs.readShiftedGamma() + 1; break; case GAMMA: count = ibs.readGamma() + 1; break; case DELTA: count = ibs.readDelta() + 1; break; default: throw new IllegalStateException( "The required count coding (" + countCoding + ") is not supported." ); } } if ( state == BEFORE_POSITIONS ) { // Here we just skip; note that the state change is necessary if endOfList() is true state = BEFORE_POINTER; switch( positionCoding ) { case SHIFTED_GAMMA: ibs.skipShiftedGammas( count ); break; case GAMMA: ibs.skipGammas( count ); break; case DELTA: ibs.skipDeltas( count ); break; case GOLOMB: if ( ASSERTS ) assert index.sizes != null; int docSize = index.sizes.getInt( currentDocument ); if ( count < 3 ) for( int i = 0; i < count; i++ ) ibs.readMinimalBinary( docSize ); else { final int bb = BitStreamIndex.golombModulus( count, docSize ); if ( bb != 0 ) { final int log2bb = Fast.mostSignificantBit( bb ); for( int i = 0; i < count; i++ ) ibs.readGolomb( bb, log2bb ); } } break; case SKEWED_GOLOMB: if ( ASSERTS ) assert index.sizes != null; docSize = index.sizes.getInt( currentDocument ); if ( count < 3 ) for( int i = 0; i < count; i++ ) ibs.readMinimalBinary( docSize ); else { final int sb = ibs.readMinimalBinary( docSize ) + 1; for( int i = 0; i < count; i++ ) ibs.readSkewedGolomb( sb ); } break; case INTERPOLATIVE: it.unimi.dsi.mg4j.io.InterpolativeCoding.read( ibs, null, 0, count, 0, index.sizes.getInt( currentDocument ) - 1 ); break; default: throw new IllegalStateException( "The required position coding (" + positionCoding + ") is not supported." ); } } } if ( endOfList() ) return -1; if ( hasPointers ) {// We do not write pointers for everywhere occurring terms. switch( pointerCoding ) { case SHIFTED_GAMMA: currentDocument += ibs.readShiftedGamma() + 1; break; case GAMMA: currentDocument += ibs.readGamma() + 1; break; case DELTA: currentDocument += ibs.readDelta() + 1; break; case GOLOMB: currentDocument += ibs.readGolomb( b, log2b ) + 1; break; default: throw new IllegalStateException( "The required pointer coding (" + pointerCoding + ") is not supported." ); } } else currentDocument++; numberOfDocumentRecord++; if ( hasPayloads ) state = BEFORE_PAYLOAD; else if ( hasCounts ) state = BEFORE_COUNT; count = -1; if ( hasSkips && ( numberOfDocumentRecord & quantumModuloMask ) == 0 ) state = BEFORE_TOWER; return currentDocument; } /** Reads the entire skip tower for the current position. */ private void readTower() throws IOException { readTower( -1 ); } /** Reads the skip tower for the current position, possibly skipping part of the tower. * * <P>Note that this method will update {@link #state} only if it reads the entire tower, * otherwise the state remains {@link #BEFORE_TOWER}. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -