📄 termshashperfield.java

📁 lucene-2.4.0 是一个全文收索的工具包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
      do {        code += inc;        hashPos = code & postingsHashMask;        p = postingsHash[hashPos];      } while (p != null && p.textStart != textStart);    }    if (p == null) {      // First time we are seeing this token since we last      // flushed the hash.      // Refill?      if (0 == perThread.freePostingsCount)        perThread.morePostings();      // Pull next free RawPostingList from free list      p = perThread.freePostings[--perThread.freePostingsCount];      assert p != null;      p.textStart = textStart;                assert postingsHash[hashPos] == null;      postingsHash[hashPos] = p;      numPostings++;      if (numPostings == postingsHashHalfSize)        rehashPostings(2*postingsHashSize);      // Init stream slices      if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)        intPool.nextBuffer();      if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)        bytePool.nextBuffer();      intUptos = intPool.buffer;      intUptoStart = intPool.intUpto;      intPool.intUpto += streamCount;      p.intStart = intUptoStart + intPool.intOffset;      for(int i=0;i<streamCount;i++) {        final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);        intUptos[intUptoStart+i] = upto + bytePool.byteOffset;      }      p.byteStart = intUptos[intUptoStart];      consumer.newTerm(token, p);    } else {      intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];      intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;      consumer.addTerm(token, p);    }  }  // Primary entry point (for first TermsHash)  void add(Token token) throws IOException {    assert !postingsCompacted;    // We are first in the chain so we must "intern" the    // term text into textStart address    // Get the text of this term.    final char[] tokenText = token.termBuffer();    final int tokenTextLen = token.termLength();    // Compute hashcode & replace any invalid UTF16 sequences    int downto = tokenTextLen;    int code = 0;    while (downto > 0) {      char ch = tokenText[--downto];      if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) {        if (0 == downto) {          // Unpaired          ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;        } else {          final char ch2 = tokenText[downto-1];          if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) {            // OK: high followed by low.  This is a valid            // surrogate pair.            code = ((code*31) + ch)*31+ch2;            downto--;            continue;          } else {            // Unpaired            ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;          }                    }      } else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END)        // Unpaired        ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;      code = (code*31) + ch;    }    int hashPos = code & postingsHashMask;    // Locate RawPostingList in hash    p = postingsHash[hashPos];    if (p != null && !postingEquals(tokenText, tokenTextLen)) {      // Conflict: keep searching different locations in      // the hash table.      final int inc = ((code>>8)+code)|1;      do {        code += inc;        hashPos = code & postingsHashMask;        p = postingsHash[hashPos];      } while (p != null && !postingEquals(tokenText, tokenTextLen));    }    if (p == null) {      // First time we are seeing this token since we last      // flushed the hash.      final int textLen1 = 1+tokenTextLen;      if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) {        if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) {          // Just skip this term, to remain as robust as          // possible during indexing.  A TokenFilter          // can be inserted into the analyzer chain if          // other behavior is wanted (pruning the term          // to a prefix, throwing an exception, etc).          if (docState.maxTermPrefix == null)            docState.maxTermPrefix = new String(tokenText, 0, 30);          consumer.skippingLongTerm(token);          return;        }        charPool.nextBuffer();      }      // Refill?      if (0 == perThread.freePostingsCount)        perThread.morePostings();      // Pull next free RawPostingList from free list      p = perThread.freePostings[--perThread.freePostingsCount];      assert p != null;      final char[] text = charPool.buffer;      final int textUpto = charPool.charUpto;      p.textStart = textUpto + charPool.charOffset;      charPool.charUpto += textLen1;      System.arraycopy(tokenText, 0, text, textUpto, tokenTextLen);      text[textUpto+tokenTextLen] = 0xffff;                assert postingsHash[hashPos] == null;      postingsHash[hashPos] = p;      numPostings++;      if (numPostings == postingsHashHalfSize)        rehashPostings(2*postingsHashSize);      // Init stream slices      if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)        intPool.nextBuffer();      if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)        bytePool.nextBuffer();      intUptos = intPool.buffer;      intUptoStart = intPool.intUpto;      intPool.intUpto += streamCount;      p.intStart = intUptoStart + intPool.intOffset;      for(int i=0;i<streamCount;i++) {        final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);        intUptos[intUptoStart+i] = upto + bytePool.byteOffset;      }      p.byteStart = intUptos[intUptoStart];      consumer.newTerm(token, p);    } else {      intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];      intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;      consumer.addTerm(token, p);    }    if (doNextCall)      nextPerField.add(token, p.textStart);  }  int[] intUptos;  int intUptoStart;  void writeByte(int stream, byte b) {    int upto = intUptos[intUptoStart+stream];    byte[] bytes = bytePool.buffers[upto >> DocumentsWriter.BYTE_BLOCK_SHIFT];    assert bytes != null;    int offset = upto & DocumentsWriter.BYTE_BLOCK_MASK;    if (bytes[offset] != 0) {      // End of slice; allocate a new one      offset = bytePool.allocSlice(bytes, offset);      bytes = bytePool.buffer;      intUptos[intUptoStart+stream] = offset + bytePool.byteOffset;    }    bytes[offset] = b;    (intUptos[intUptoStart+stream])++;  }  public void writeBytes(int stream, byte[] b, int offset, int len) {    // TODO: optimize    final int end = offset + len;    for(int i=offset;i<end;i++)      writeByte(stream, b[i]);  }  void writeVInt(int stream, int i) {    assert stream < streamCount;    while ((i & ~0x7F) != 0) {      writeByte(stream, (byte)((i & 0x7f) | 0x80));      i >>>= 7;    }    writeByte(stream, (byte) i);  }  void finish() throws IOException {    consumer.finish();    if (nextPerField != null)      nextPerField.finish();  }  /** Called when postings hash is too small (> 50%   *  occupied) or too large (< 20% occupied). */  void rehashPostings(final int newSize) {    final int newMask = newSize-1;    RawPostingList[] newHash = new RawPostingList[newSize];    for(int i=0;i<postingsHashSize;i++) {      RawPostingList p0 = postingsHash[i];      if (p0 != null) {        int code;        if (perThread.primary) {          final int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK;          final char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];          int pos = start;          while(text[pos] != 0xffff)            pos++;          code = 0;          while (pos > start)            code = (code*31) + text[--pos];        } else          code = p0.textStart;        int hashPos = code & newMask;        assert hashPos >= 0;        if (newHash[hashPos] != null) {          final int inc = ((code>>8)+code)|1;          do {            code += inc;            hashPos = code & newMask;          } while (newHash[hashPos] != null);        }        newHash[hashPos] = p0;      }    }    postingsHashMask = newMask;    postingsHash = newHash;    postingsHashSize = newSize;    postingsHashHalfSize = newSize >> 1;  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -