📄 termshashperfield.java
字号:
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.util.Arrays;import org.apache.lucene.document.Fieldable;import org.apache.lucene.analysis.Token;import org.apache.lucene.util.UnicodeUtil;final class TermsHashPerField extends InvertedDocConsumerPerField { final TermsHashConsumerPerField consumer; final TermsHashPerField nextPerField; final TermsHashPerThread perThread; final DocumentsWriter.DocState docState; final DocInverter.FieldInvertState fieldState; // Copied from our perThread final CharBlockPool charPool; final IntBlockPool intPool; final ByteBlockPool bytePool; final int streamCount; final int numPostingInt; final FieldInfo fieldInfo; boolean postingsCompacted; int numPostings; private int postingsHashSize = 4; private int postingsHashHalfSize = postingsHashSize/2; private int postingsHashMask = postingsHashSize-1; private RawPostingList[] postingsHash = new RawPostingList[postingsHashSize]; private RawPostingList p; public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) { this.perThread = perThread; intPool = perThread.intPool; charPool = perThread.charPool; bytePool = perThread.bytePool; docState = perThread.docState; fieldState = docInverterPerField.fieldState; this.consumer = perThread.consumer.addField(this, fieldInfo); streamCount = consumer.getStreamCount(); numPostingInt = 2*streamCount; this.fieldInfo = fieldInfo; if (nextPerThread != null) nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo); else nextPerField = null; } void shrinkHash(int targetSize) { assert postingsCompacted || numPostings == 0; // Cannot use ArrayUtil.shrink because we require power // of 2: int newSize = postingsHash.length; while(newSize >= 8 && newSize/4 > targetSize) { newSize /= 2; } if (newSize != postingsHash.length) { postingsHash = new RawPostingList[newSize]; postingsHashSize = newSize; postingsHashHalfSize = newSize/2; postingsHashMask = newSize-1; } } public void reset() { if (!postingsCompacted) compactPostings(); assert numPostings <= postingsHash.length; if (numPostings > 0) { perThread.termsHash.recyclePostings(postingsHash, numPostings); Arrays.fill(postingsHash, 0, numPostings, null); numPostings = 0; } postingsCompacted = false; if (nextPerField != null) nextPerField.reset(); } synchronized public void abort() { reset(); if (nextPerField != null) nextPerField.abort(); } public void initReader(ByteSliceReader reader, RawPostingList p, int stream) { assert stream < streamCount; final int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; final int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK; reader.init(bytePool, p.byteStart+stream*ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto+stream]); } private synchronized void compactPostings() { int upto = 0; for(int i=0;i<postingsHashSize;i++) { if (postingsHash[i] != null) { if (upto < i) { postingsHash[upto] = postingsHash[i]; postingsHash[i] = null; } upto++; } } assert upto == numPostings; postingsCompacted = true; } /** Collapse the hash table & sort in-place. */ public RawPostingList[] sortPostings() { compactPostings(); quickSort(postingsHash, 0, numPostings-1); return postingsHash; } void quickSort(RawPostingList[] postings, int lo, int hi) { if (lo >= hi) return; else if (hi == 1+lo) { if (comparePostings(postings[lo], postings[hi]) > 0) { final RawPostingList tmp = postings[lo]; postings[lo] = postings[hi]; postings[hi] = tmp; } return; } int mid = (lo + hi) >>> 1; if (comparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp; } if (comparePostings(postings[mid], postings[hi]) > 0) { RawPostingList tmp = postings[mid]; postings[mid] = postings[hi]; postings[hi] = tmp; if (comparePostings(postings[lo], postings[mid]) > 0) { RawPostingList tmp2 = postings[lo]; postings[lo] = postings[mid]; postings[mid] = tmp2; } } int left = lo + 1; int right = hi - 1; if (left >= right) return; RawPostingList partition = postings[mid]; for (; ;) { while (comparePostings(postings[right], partition) > 0) --right; while (left < right && comparePostings(postings[left], partition) <= 0) ++left; if (left < right) { RawPostingList tmp = postings[left]; postings[left] = postings[right]; postings[right] = tmp; --right; } else { break; } } quickSort(postings, lo, left); quickSort(postings, left + 1, hi); } /** Compares term text for two Posting instance and * returns -1 if p1 < p2; 1 if p1 > p2; else 0. */ int comparePostings(RawPostingList p1, RawPostingList p2) { if (p1 == p2) return 0; final char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK; final char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK; assert text1 != text2 || pos1 != pos2; while(true) { final char c1 = text1[pos1++]; final char c2 = text2[pos2++]; if (c1 != c2) { if (0xffff == c2) return 1; else if (0xffff == c1) return -1; else return c1-c2; } else // This method should never compare equal postings // unless p1==p2 assert c1 != 0xffff; } } /** Test whether the text for current RawPostingList p equals * current tokenText. */ private boolean postingEquals(final char[] tokenText, final int tokenTextLen) { final char[] text = perThread.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; assert text != null; int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK; int tokenPos = 0; for(;tokenPos<tokenTextLen;pos++,tokenPos++) if (tokenText[tokenPos] != text[pos]) return false; return 0xffff == text[pos]; } private boolean doCall; private boolean doNextCall; boolean start(Fieldable[] fields, int count) throws IOException { doCall = consumer.start(fields, count); if (nextPerField != null) doNextCall = nextPerField.start(fields, count); return doCall || doNextCall; } // Secondary entry point (for 2nd & subsequent TermsHash), // because token text has already been "interned" into // textStart, so we hash by textStart public void add(Token token, int textStart) throws IOException { int code = textStart; int hashPos = code & postingsHashMask; assert !postingsCompacted; // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p != null && p.textStart != textStart) { // Conflict: keep searching different locations in // the hash table. final int inc = ((code>>8)+code)|1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -