docfieldprocessorperthread.java

来自「lucene-2.4.0 是一个全文收索的工具包」· Java 代码 · 共 303 行
JAVA
303 行
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.util.Collection;import java.util.HashSet;import java.util.List;import java.io.IOException;import org.apache.lucene.document.Document;import org.apache.lucene.document.Fieldable;/** * Gathers all Fieldables for a document under the same * name, updates FieldInfos, and calls per-field consumers * to process field by field. * * Currently, only a single thread visits the fields, * sequentially, for processing. */final class DocFieldProcessorPerThread extends DocConsumerPerThread {  float docBoost;  int fieldGen;  final DocFieldProcessor docFieldProcessor;  final FieldInfos fieldInfos;  final DocFieldConsumerPerThread consumer;  // Holds all fields seen in current doc  DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];  int fieldCount;  // Hash table for all fields ever seen  DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];  int hashMask = 1;  int totalFieldCount;  final DocumentsWriter.DocState docState;    public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {    this.docState = threadState.docState;    this.docFieldProcessor = docFieldProcessor;    this.fieldInfos = docFieldProcessor.fieldInfos;    this.consumer = docFieldProcessor.consumer.addThread(this);  }  public void abort() {    for(int i=0;i<fieldHash.length;i++) {      DocFieldProcessorPerField field = fieldHash[i];      while(field != null) {        final DocFieldProcessorPerField next = field.next;        field.abort();        field = next;      }    }    consumer.abort();  }  public Collection fields() {    Collection fields = new HashSet();    for(int i=0;i<fieldHash.length;i++) {      DocFieldProcessorPerField field = fieldHash[i];      while(field != null) {        fields.add(field.consumer);        field = field.next;      }    }    assert fields.size() == totalFieldCount;    return fields;  }  /** If there are fields we've seen but did not see again   *  in the last run, then free them up. */  void trimFields(DocumentsWriter.FlushState state) {    for(int i=0;i<fieldHash.length;i++) {      DocFieldProcessorPerField perField = fieldHash[i];      DocFieldProcessorPerField lastPerField = null;      while (perField != null) {        if (perField.lastGen == -1) {          // This field was not seen since the previous          // flush, so, free up its resources now          // Unhash          if (lastPerField == null)            fieldHash[i] = perField.next;          else            lastPerField.next = perField.next;          if (state.docWriter.infoStream != null)            state.docWriter.infoStream.println("  purge field=" + perField.fieldInfo.name);          totalFieldCount--;        } else {          // Reset          perField.lastGen = -1;          lastPerField = perField;        }        perField = perField.next;      }    }  }  private void rehash() {    final int newHashSize = (int) (fieldHash.length*2);    assert newHashSize > fieldHash.length;    final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize];    // Rehash    int newHashMask = newHashSize-1;    for(int j=0;j<fieldHash.length;j++) {      DocFieldProcessorPerField fp0 = fieldHash[j];      while(fp0 != null) {        final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;        DocFieldProcessorPerField nextFP0 = fp0.next;        fp0.next = newHashArray[hashPos2];        newHashArray[hashPos2] = fp0;        fp0 = nextFP0;      }    }    fieldHash = newHashArray;    hashMask = newHashMask;  }  public DocumentsWriter.DocWriter processDocument() throws IOException {    consumer.startDocument();    final Document doc = docState.doc;    assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");    fieldCount = 0;        final int thisFieldGen = fieldGen++;    final List docFields = doc.getFields();    final int numDocFields = docFields.size();    // Absorb any new fields first seen in this document.    // Also absorb any changes to fields we had already    // seen before (eg suddenly turning on norms or    // vectors, etc.):    for(int i=0;i<numDocFields;i++) {      Fieldable field = (Fieldable) docFields.get(i);      final String fieldName = field.name();      // Make sure we have a PerField allocated      final int hashPos = fieldName.hashCode() & hashMask;      DocFieldProcessorPerField fp = fieldHash[hashPos];      while(fp != null && !fp.fieldInfo.name.equals(fieldName))        fp = fp.next;      if (fp == null) {        // TODO FI: we need to genericize the "flags" that a        // field holds, and, how these flags are merged; it        // needs to be more "pluggable" such that if I want        // to have a new "thing" my Fields can do, I can        // easily add it        FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),                                      field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),                                      field.getOmitNorms(), false, field.getOmitTf());        fp = new DocFieldProcessorPerField(this, fi);        fp.next = fieldHash[hashPos];        fieldHash[hashPos] = fp;        totalFieldCount++;        if (totalFieldCount >= fieldHash.length/2)          rehash();      } else        fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),                            field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),                            field.getOmitNorms(), false, field.getOmitTf());      if (thisFieldGen != fp.lastGen) {        // First time we're seeing this field for this doc        fp.fieldCount = 0;        if (fieldCount == fields.length) {          final int newSize = fields.length*2;          DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];          System.arraycopy(fields, 0, newArray, 0, fieldCount);          fields = newArray;        }        fields[fieldCount++] = fp;        fp.lastGen = thisFieldGen;      }      if (fp.fieldCount == fp.fields.length) {        Fieldable[] newArray = new Fieldable[fp.fields.length*2];        System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount);        fp.fields = newArray;      }      fp.fields[fp.fieldCount++] = field;    }    // If we are writing vectors then we must visit    // fields in sorted order so they are written in    // sorted order.  TODO: we actually only need to    // sort the subset of fields that have vectors    // enabled; we could save [small amount of] CPU    // here.    quickSort(fields, 0, fieldCount-1);    for(int i=0;i<fieldCount;i++)      fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);    if (docState.maxTermPrefix != null && docState.infoStream != null)      docState.infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");     return consumer.finishDocument();  }  void quickSort(DocFieldProcessorPerField[] array, int lo, int hi) {    if (lo >= hi)      return;    else if (hi == 1+lo) {      if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) {        final DocFieldProcessorPerField tmp = array[lo];        array[lo] = array[hi];        array[hi] = tmp;      }      return;    }    int mid = (lo + hi) >>> 1;    if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) {      DocFieldProcessorPerField tmp = array[lo];      array[lo] = array[mid];      array[mid] = tmp;    }    if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) {      DocFieldProcessorPerField tmp = array[mid];      array[mid] = array[hi];      array[hi] = tmp;      if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) {        DocFieldProcessorPerField tmp2 = array[lo];        array[lo] = array[mid];        array[mid] = tmp2;      }    }    int left = lo + 1;    int right = hi - 1;    if (left >= right)      return;    DocFieldProcessorPerField partition = array[mid];    for (; ;) {      while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0)        --right;      while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0)        ++left;      if (left < right) {        DocFieldProcessorPerField tmp = array[left];        array[left] = array[right];        array[right] = tmp;        --right;      } else {        break;      }    }    quickSort(array, lo, left);    quickSort(array, left + 1, hi);  }}
docfieldprocessorperthread.java - 源码说明

本页面展示了「lucene-2.4.0 是一个全文收索的工具包」中的 docfieldprocessorperthread.java 源码文件，采用 Java 编程语言编写，共 303 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与lucene相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?