📄 termshashperfield.java

📁 lucene-2.4.0 是一个全文收索的工具包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.util.Arrays;import org.apache.lucene.document.Fieldable;import org.apache.lucene.analysis.Token;import org.apache.lucene.util.UnicodeUtil;final class TermsHashPerField extends InvertedDocConsumerPerField {  final TermsHashConsumerPerField consumer;  final TermsHashPerField nextPerField;  final TermsHashPerThread perThread;  final DocumentsWriter.DocState docState;  final DocInverter.FieldInvertState fieldState;  // Copied from our perThread  final CharBlockPool charPool;  final IntBlockPool intPool;  final ByteBlockPool bytePool;  final int streamCount;  final int numPostingInt;  final FieldInfo fieldInfo;  boolean postingsCompacted;  int numPostings;  private int postingsHashSize = 4;  private int postingsHashHalfSize = postingsHashSize/2;  private int postingsHashMask = postingsHashSize-1;  private RawPostingList[] postingsHash = new RawPostingList[postingsHashSize];  private RawPostingList p;  public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {    this.perThread = perThread;    intPool = perThread.intPool;    charPool = perThread.charPool;    bytePool = perThread.bytePool;    docState = perThread.docState;    fieldState = docInverterPerField.fieldState;    this.consumer = perThread.consumer.addField(this, fieldInfo);    streamCount = consumer.getStreamCount();    numPostingInt = 2*streamCount;    this.fieldInfo = fieldInfo;    if (nextPerThread != null)      nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);    else      nextPerField = null;  }  void shrinkHash(int targetSize) {    assert postingsCompacted || numPostings == 0;    // Cannot use ArrayUtil.shrink because we require power    // of 2:    int newSize = postingsHash.length;    while(newSize >= 8 && newSize/4 > targetSize) {      newSize /= 2;    }    if (newSize != postingsHash.length) {      postingsHash = new RawPostingList[newSize];      postingsHashSize = newSize;      postingsHashHalfSize = newSize/2;      postingsHashMask = newSize-1;    }  }  public void reset() {    if (!postingsCompacted)      compactPostings();    assert numPostings <= postingsHash.length;    if (numPostings > 0) {      perThread.termsHash.recyclePostings(postingsHash, numPostings);      Arrays.fill(postingsHash, 0, numPostings, null);      numPostings = 0;    }    postingsCompacted = false;    if (nextPerField != null)      nextPerField.reset();  }  synchronized public void abort() {    reset();    if (nextPerField != null)      nextPerField.abort();  }  public void initReader(ByteSliceReader reader, RawPostingList p, int stream) {    assert stream < streamCount;    final int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];    final int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK;    reader.init(bytePool,                p.byteStart+stream*ByteBlockPool.FIRST_LEVEL_SIZE,                ints[upto+stream]);  }  private synchronized void compactPostings() {    int upto = 0;    for(int i=0;i<postingsHashSize;i++) {      if (postingsHash[i] != null) {        if (upto < i) {          postingsHash[upto] = postingsHash[i];          postingsHash[i] = null;        }        upto++;      }    }    assert upto == numPostings;    postingsCompacted = true;  }  /** Collapse the hash table & sort in-place. */  public RawPostingList[] sortPostings() {    compactPostings();    quickSort(postingsHash, 0, numPostings-1);    return postingsHash;  }  void quickSort(RawPostingList[] postings, int lo, int hi) {    if (lo >= hi)      return;    else if (hi == 1+lo) {      if (comparePostings(postings[lo], postings[hi]) > 0) {        final RawPostingList tmp = postings[lo];        postings[lo] = postings[hi];        postings[hi] = tmp;      }      return;    }    int mid = (lo + hi) >>> 1;    if (comparePostings(postings[lo], postings[mid]) > 0) {      RawPostingList tmp = postings[lo];      postings[lo] = postings[mid];      postings[mid] = tmp;    }    if (comparePostings(postings[mid], postings[hi]) > 0) {      RawPostingList tmp = postings[mid];      postings[mid] = postings[hi];      postings[hi] = tmp;      if (comparePostings(postings[lo], postings[mid]) > 0) {        RawPostingList tmp2 = postings[lo];        postings[lo] = postings[mid];        postings[mid] = tmp2;      }    }    int left = lo + 1;    int right = hi - 1;    if (left >= right)      return;    RawPostingList partition = postings[mid];    for (; ;) {      while (comparePostings(postings[right], partition) > 0)        --right;      while (left < right && comparePostings(postings[left], partition) <= 0)        ++left;      if (left < right) {        RawPostingList tmp = postings[left];        postings[left] = postings[right];        postings[right] = tmp;        --right;      } else {        break;      }    }    quickSort(postings, lo, left);    quickSort(postings, left + 1, hi);  }  /** Compares term text for two Posting instance and   *  returns -1 if p1 < p2; 1 if p1 > p2; else 0. */  int comparePostings(RawPostingList p1, RawPostingList p2) {    if (p1 == p2)      return 0;    final char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];    int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK;    final char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];    int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK;    assert text1 != text2 || pos1 != pos2;    while(true) {      final char c1 = text1[pos1++];      final char c2 = text2[pos2++];      if (c1 != c2) {        if (0xffff == c2)          return 1;        else if (0xffff == c1)          return -1;        else          return c1-c2;      } else        // This method should never compare equal postings        // unless p1==p2        assert c1 != 0xffff;    }  }  /** Test whether the text for current RawPostingList p equals   *  current tokenText. */  private boolean postingEquals(final char[] tokenText, final int tokenTextLen) {    final char[] text = perThread.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];    assert text != null;    int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;    int tokenPos = 0;    for(;tokenPos<tokenTextLen;pos++,tokenPos++)      if (tokenText[tokenPos] != text[pos])        return false;    return 0xffff == text[pos];  }    private boolean doCall;  private boolean doNextCall;  boolean start(Fieldable[] fields, int count) throws IOException {    doCall = consumer.start(fields, count);    if (nextPerField != null)      doNextCall = nextPerField.start(fields, count);    return doCall || doNextCall;  }  // Secondary entry point (for 2nd & subsequent TermsHash),  // because token text has already been "interned" into  // textStart, so we hash by textStart  public void add(Token token, int textStart) throws IOException {    int code = textStart;    int hashPos = code & postingsHashMask;    assert !postingsCompacted;    // Locate RawPostingList in hash    p = postingsHash[hashPos];    if (p != null && p.textStart != textStart) {      // Conflict: keep searching different locations in      // the hash table.      final int inc = ((code>>8)+code)|1;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -