📄 gammadeltagammadeltabitstreamindexreader.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
  private boolean endOfList() {   if ( ASSERTS ) assert numberOfDocumentRecord <= frequency;   return numberOfDocumentRecord >= frequency - 1;  }  public int document() {   if ( ASSERTS ) ensureCurrentDocument();   return currentDocument;  }  public Payload payload() throws IOException {   if ( DEBUG ) System.err.println( this + ".payload()" );   if ( ASSERTS ) ensureCurrentDocument();    throw new UnsupportedOperationException( "This index ("+ index + ") does not contain payloads" );  }  public int count() throws IOException {   if ( DEBUG ) System.err.println( this + ".count()" );   if ( count != -1 ) return count;   if ( ASSERTS ) ensureCurrentDocument();  {   state = BEFORE_POSITIONS;    count = ibs.readGamma() + 1;  }   return count;  }  /** We read positions, assuming state <= BEFORE_POSITIONS */  @SuppressWarnings("unused")  protected void updatePositionCache() throws IOException {   if ( ASSERTS ) assert state <= BEFORE_POSITIONS;   if ( state < BEFORE_POSITIONS ) {    if ( state == BEFORE_COUNT )  {   state = BEFORE_POSITIONS;    count = ibs.readGamma() + 1;  }   }    if ( count > positionCache.length ) positionCache = new int[ Math.max( positionCache.length * 2, count ) ];    final int[] occ = positionCache;    state = BEFORE_POINTER;     ibs.readDeltas( occ, count );     for( int i = 1; i < count; i++ ) occ[ i ] += occ[ i - 1 ] + 1;  }  public IntIterator positions() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache();   return IntIterators.wrap( positionCache, 0, count );  }  public int[] positionArray() throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache();   return positionCache;  }  // TODO: check who's using this (positionArray() is actually faster now)  public int positions( final int[] position ) throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   if ( state <= BEFORE_POSITIONS ) updatePositionCache(); // And also that positions have been read   if ( position.length < count ) return -count;   for( int i = count; i-- != 0; ) position[ i ] = this.positionCache[ i ];   return count;  }  public int nextDocument() throws IOException {   if ( DEBUG ) System.err.println( "{" + this + "} nextDocument()" );   if ( state != BEFORE_POINTER ) {    if ( state == BEFORE_COUNT )  {   state = BEFORE_POSITIONS;    count = ibs.readGamma() + 1;  }    if ( state == BEFORE_POSITIONS ) {     // Here we just skip; note that the state change is necessary if endOfList() is true     state = BEFORE_POINTER;      ibs.skipDeltas( count );    }   }   if ( endOfList() ) return -1;   if ( hasPointers ) {// We do not write pointers for everywhere occurring terms.     currentDocument += ibs.readDelta() + 1;   }   else currentDocument++;   numberOfDocumentRecord++;    state = BEFORE_COUNT;   count = -1;   return currentDocument;  }  public int skipTo( final int p ) throws IOException {   if ( DEBUG ) System.err.println( this + ".skipTo(" + p + ") [currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", endOfList()=" + endOfList() );   // If we are just at the start of a list, let us read the first pointer.   if ( numberOfDocumentRecord == -1 ) nextDocument(); // TODO: shouldn't we just read the tower?   if ( currentDocument >= p ) {    if ( DEBUG ) System.err.println( this + ": No skip necessary, returning " + currentDocument );    return currentDocument;   }   while( currentDocument < p ) {    if ( DEBUG ) System.err.println( this + ": Skipping sequentially (second), currentDocument=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", p=" + p );    if ( nextDocument() == -1 ) {     if ( DEBUG ) System.err.println( this + ": end-of-list, returning MAX_VALUE" );     return Integer.MAX_VALUE;    }   }   if ( DEBUG ) System.err.println( this + ".toSkip(): Returning " + currentDocument );   return currentDocument;  }  public void dispose() throws IOException {   parent.close();  }  public boolean hasNext() {   return ! endOfList();  }  public int nextInt() {   if ( ! hasNext() ) throw new NoSuchElementException();   try {    return nextDocument();   }   catch ( IOException e ) {    throw new RuntimeException( e );   }  }  public String toString() {   return index + " [" + currentTerm + "]";  }  /** An interval iterator returning the positions of the current document as singleton intervals. */  private final class IndexIntervalIterator extends AbstractObjectIterator<Interval> implements IntervalIterator {   int pos = -1;   public void reset() throws IOException {    pos = -1;    if ( state <= BEFORE_POSITIONS ) updatePositionCache(); // This guarantees the position cache is ok    }   public void intervalTerms( final IntSet terms ) {    terms.add( BitStreamIndexReaderIndexIterator.this.currentTerm );   }   public boolean hasNext() {    return pos < count - 1;   }   public Interval next() {    if ( ! hasNext() ) throw new NoSuchElementException();    return Interval.valueOf( positionCache[ ++pos ] );   }   public Interval nextInterval() {    return pos < count - 1 ? Interval.valueOf( positionCache[ ++pos ] ) : null;   }   public int extent() {    return 1;   }   public String toString() {    return index + ": " + term + "[doc=" + currentDocument + ", count=" + count + ", pos=" + pos + "]";   }  };  public Reference2ReferenceMap<Index,IntervalIterator> intervalIterators() throws IOException {   intervalIterator();   return singletonIntervalIterator;  }  public IntervalIterator intervalIterator() throws IOException {   return intervalIterator( keyIndex );  }  public IntervalIterator intervalIterator( final Index index ) throws IOException {   if ( ASSERTS ) ensureCurrentDocument();   // TODO: this was if ( index != keyIndex || hasPayloads )    if ( index != keyIndex ) return IntervalIterators.TRUE;   if ( ASSERTS ) assert intervalIterator != null;   intervalIterator.reset();   return intervalIterator;  }  public ReferenceSet<Index> indices() {   return index.singletonSet;  } } private IndexIterator documents( final CharSequence term, final int termNumber ) throws IOException {  indexIterator.term( term );  indexIterator.position( termNumber );  return indexIterator; } public IndexIterator documents( final int term ) throws IOException {  return documents( null, term ); } public IndexIterator documents( final CharSequence term ) throws IOException {  if ( closed ) throw new IllegalStateException( "This " + getClass().getSimpleName() + " has been closed" );  if ( index.termMap != null ) {   final int termIndex = (int)index.termMap.getLong( term );   if ( termIndex == -1 ) return index.emptyIndexIterator;   return documents( term, termIndex );  }  throw new UnsupportedOperationException( "Index " + index + " has no term map" ); }  @Override  public IndexIterator nextIterator() throws IOException {   return indexIterator.advance();  } public String toString() {  return getClass().getSimpleName() + "[" + index + "]"; } public void close() throws IOException {  super.close();  indexIterator.ibs.close(); }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -