📄 termshashperfield.java
字号:
do { code += inc; hashPos = code & postingsHashMask; p = postingsHash[hashPos]; } while (p != null && p.textStart != textStart); } if (p == null) { // First time we are seeing this token since we last // flushed the hash. // Refill? if (0 == perThread.freePostingsCount) perThread.morePostings(); // Pull next free RawPostingList from free list p = perThread.freePostings[--perThread.freePostingsCount]; assert p != null; p.textStart = textStart; assert postingsHash[hashPos] == null; postingsHash[hashPos] = p; numPostings++; if (numPostings == postingsHashHalfSize) rehashPostings(2*postingsHashSize); // Init stream slices if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) intPool.nextBuffer(); if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) bytePool.nextBuffer(); intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; p.intStart = intUptoStart + intPool.intOffset; for(int i=0;i<streamCount;i++) { final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart+i] = upto + bytePool.byteOffset; } p.byteStart = intUptos[intUptoStart]; consumer.newTerm(token, p); } else { intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; consumer.addTerm(token, p); } } // Primary entry point (for first TermsHash) void add(Token token) throws IOException { assert !postingsCompacted; // We are first in the chain so we must "intern" the // term text into textStart address // Get the text of this term. final char[] tokenText = token.termBuffer(); final int tokenTextLen = token.termLength(); // Compute hashcode & replace any invalid UTF16 sequences int downto = tokenTextLen; int code = 0; while (downto > 0) { char ch = tokenText[--downto]; if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) { if (0 == downto) { // Unpaired ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR; } else { final char ch2 = tokenText[downto-1]; if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) { // OK: high followed by low. This is a valid // surrogate pair. code = ((code*31) + ch)*31+ch2; downto--; continue; } else { // Unpaired ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR; } } } else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END) // Unpaired ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR; code = (code*31) + ch; } int hashPos = code & postingsHashMask; // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p != null && !postingEquals(tokenText, tokenTextLen)) { // Conflict: keep searching different locations in // the hash table. final int inc = ((code>>8)+code)|1; do { code += inc; hashPos = code & postingsHashMask; p = postingsHash[hashPos]; } while (p != null && !postingEquals(tokenText, tokenTextLen)); } if (p == null) { // First time we are seeing this token since we last // flushed the hash. final int textLen1 = 1+tokenTextLen; if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) { if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) { // Just skip this term, to remain as robust as // possible during indexing. A TokenFilter // can be inserted into the analyzer chain if // other behavior is wanted (pruning the term // to a prefix, throwing an exception, etc). if (docState.maxTermPrefix == null) docState.maxTermPrefix = new String(tokenText, 0, 30); consumer.skippingLongTerm(token); return; } charPool.nextBuffer(); } // Refill? if (0 == perThread.freePostingsCount) perThread.morePostings(); // Pull next free RawPostingList from free list p = perThread.freePostings[--perThread.freePostingsCount]; assert p != null; final char[] text = charPool.buffer; final int textUpto = charPool.charUpto; p.textStart = textUpto + charPool.charOffset; charPool.charUpto += textLen1; System.arraycopy(tokenText, 0, text, textUpto, tokenTextLen); text[textUpto+tokenTextLen] = 0xffff; assert postingsHash[hashPos] == null; postingsHash[hashPos] = p; numPostings++; if (numPostings == postingsHashHalfSize) rehashPostings(2*postingsHashSize); // Init stream slices if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) intPool.nextBuffer(); if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) bytePool.nextBuffer(); intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; p.intStart = intUptoStart + intPool.intOffset; for(int i=0;i<streamCount;i++) { final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); intUptos[intUptoStart+i] = upto + bytePool.byteOffset; } p.byteStart = intUptos[intUptoStart]; consumer.newTerm(token, p); } else { intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; consumer.addTerm(token, p); } if (doNextCall) nextPerField.add(token, p.textStart); } int[] intUptos; int intUptoStart; void writeByte(int stream, byte b) { int upto = intUptos[intUptoStart+stream]; byte[] bytes = bytePool.buffers[upto >> DocumentsWriter.BYTE_BLOCK_SHIFT]; assert bytes != null; int offset = upto & DocumentsWriter.BYTE_BLOCK_MASK; if (bytes[offset] != 0) { // End of slice; allocate a new one offset = bytePool.allocSlice(bytes, offset); bytes = bytePool.buffer; intUptos[intUptoStart+stream] = offset + bytePool.byteOffset; } bytes[offset] = b; (intUptos[intUptoStart+stream])++; } public void writeBytes(int stream, byte[] b, int offset, int len) { // TODO: optimize final int end = offset + len; for(int i=offset;i<end;i++) writeByte(stream, b[i]); } void writeVInt(int stream, int i) { assert stream < streamCount; while ((i & ~0x7F) != 0) { writeByte(stream, (byte)((i & 0x7f) | 0x80)); i >>>= 7; } writeByte(stream, (byte) i); } void finish() throws IOException { consumer.finish(); if (nextPerField != null) nextPerField.finish(); } /** Called when postings hash is too small (> 50% * occupied) or too large (< 20% occupied). */ void rehashPostings(final int newSize) { final int newMask = newSize-1; RawPostingList[] newHash = new RawPostingList[newSize]; for(int i=0;i<postingsHashSize;i++) { RawPostingList p0 = postingsHash[i]; if (p0 != null) { int code; if (perThread.primary) { final int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK; final char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos = start; while(text[pos] != 0xffff) pos++; code = 0; while (pos > start) code = (code*31) + text[--pos]; } else code = p0.textStart; int hashPos = code & newMask; assert hashPos >= 0; if (newHash[hashPos] != null) { final int inc = ((code>>8)+code)|1; do { code += inc; hashPos = code & newMask; } while (newHash[hashPos] != null); } newHash[hashPos] = p0; } } postingsHashMask = newMask; postingsHash = newHash; postingsHashSize = newSize; postingsHashHalfSize = newSize >> 1; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -