📄 documentswriter.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
            int newSize = (int) (allFieldDataArray.length*1.5);            int newHashSize = fieldDataHash.length*2;            FieldData newArray[] = new FieldData[newSize];            FieldData newHashArray[] = new FieldData[newHashSize];            System.arraycopy(allFieldDataArray, 0, newArray, 0, numAllFieldData);            // Rehash            fieldDataHashMask = newSize-1;            for(int j=0;j<fieldDataHash.length;j++) {              FieldData fp0 = fieldDataHash[j];              while(fp0 != null) {                hashPos = fp0.fieldInfo.name.hashCode() & fieldDataHashMask;                FieldData nextFP0 = fp0.next;                fp0.next = newHashArray[hashPos];                newHashArray[hashPos] = fp0;                fp0 = nextFP0;              }            }            allFieldDataArray = newArray;            fieldDataHash = newHashArray;          }          allFieldDataArray[numAllFieldData++] = fp;        } else {          assert fp.fieldInfo == fi;        }        if (thisFieldGen != fp.lastGen) {          // First time we're seeing this field for this doc          fp.lastGen = thisFieldGen;          fp.fieldCount = 0;          fp.doVectors = fp.doVectorPositions = fp.doVectorOffsets = false;          fp.doNorms = fi.isIndexed && !fi.omitNorms;          if (numFieldData == fieldDataArray.length) {            int newSize = fieldDataArray.length*2;            FieldData newArray[] = new FieldData[newSize];            System.arraycopy(fieldDataArray, 0, newArray, 0, numFieldData);            fieldDataArray = newArray;          }          fieldDataArray[numFieldData++] = fp;        }        if (field.isTermVectorStored()) {          if (!fp.doVectors && numVectorFields++ == vectorFieldPointers.length) {            final int newSize = (int) (numVectorFields*1.5);            vectorFieldPointers = new long[newSize];            vectorFieldNumbers = new int[newSize];          }          fp.doVectors = true;          docHasVectors = true;          fp.doVectorPositions |= field.isStorePositionWithTermVector();          fp.doVectorOffsets |= field.isStoreOffsetWithTermVector();        }        if (fp.fieldCount == fp.docFields.length) {          Fieldable[] newArray = new Fieldable[fp.docFields.length*2];          System.arraycopy(fp.docFields, 0, newArray, 0, fp.docFields.length);          fp.docFields = newArray;        }        // Lazily allocate arrays for postings:        if (field.isIndexed() && fp.postingsHash == null)          fp.initPostingArrays();        fp.docFields[fp.fieldCount++] = field;      }      // Maybe init the local & global fieldsWriter      if (localFieldsWriter == null) {        if (fieldsWriter == null) {          assert docStoreSegment == null;          assert segment != null;          docStoreSegment = segment;          // If we hit an exception while init'ing the          // fieldsWriter, we must abort this segment          // because those files will be in an unknown          // state:          try {            fieldsWriter = new FieldsWriter(directory, docStoreSegment, fieldInfos);          } catch (Throwable t) {            throw new AbortException(t, DocumentsWriter.this);          }          files = null;        }        localFieldsWriter = new FieldsWriter(null, fdtLocal, fieldInfos);      }      // First time we see a doc that has field(s) with      // stored vectors, we init our tvx writer      if (docHasVectors) {        if (tvx == null) {          assert docStoreSegment != null;          // If we hit an exception while init'ing the term          // vector output files, we must abort this segment          // because those files will be in an unknown          // state:          try {            tvx = directory.createOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);            tvx.writeInt(TermVectorsReader.FORMAT_VERSION);            tvd = directory.createOutput(docStoreSegment +  "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);            tvd.writeInt(TermVectorsReader.FORMAT_VERSION);            tvf = directory.createOutput(docStoreSegment +  "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);            tvf.writeInt(TermVectorsReader.FORMAT_VERSION);            // We must "catch up" for all docs before us            // that had no vectors:            for(int i=0;i<numDocsInStore;i++) {              tvx.writeLong(tvd.getFilePointer());              tvd.writeVInt(0);            }          } catch (Throwable t) {            throw new AbortException(t, DocumentsWriter.this);          }          files = null;        }        numVectorFields = 0;      }    }    /** Do in-place sort of Posting array */    void doPostingSort(Posting[] postings, int numPosting) {      quickSort(postings, 0, numPosting-1);    }    void quickSort(Posting[] postings, int lo, int hi) {      if (lo >= hi)        return;      int mid = (lo + hi) >>> 1;      if (comparePostings(postings[lo], postings[mid]) > 0) {        Posting tmp = postings[lo];        postings[lo] = postings[mid];        postings[mid] = tmp;      }      if (comparePostings(postings[mid], postings[hi]) > 0) {        Posting tmp = postings[mid];        postings[mid] = postings[hi];        postings[hi] = tmp;        if (comparePostings(postings[lo], postings[mid]) > 0) {          Posting tmp2 = postings[lo];          postings[lo] = postings[mid];          postings[mid] = tmp2;        }      }      int left = lo + 1;      int right = hi - 1;      if (left >= right)        return;      Posting partition = postings[mid];      for (; ;) {        while (comparePostings(postings[right], partition) > 0)          --right;        while (left < right && comparePostings(postings[left], partition) <= 0)          ++left;        if (left < right) {          Posting tmp = postings[left];          postings[left] = postings[right];          postings[right] = tmp;          --right;        } else {          break;        }      }      quickSort(postings, lo, left);      quickSort(postings, left + 1, hi);    }    /** Do in-place sort of PostingVector array */    void doVectorSort(PostingVector[] postings, int numPosting) {      quickSort(postings, 0, numPosting-1);    }    void quickSort(PostingVector[] postings, int lo, int hi) {      if (lo >= hi)        return;      int mid = (lo + hi) >>> 1;      if (comparePostings(postings[lo].p, postings[mid].p) > 0) {        PostingVector tmp = postings[lo];        postings[lo] = postings[mid];        postings[mid] = tmp;      }      if (comparePostings(postings[mid].p, postings[hi].p) > 0) {        PostingVector tmp = postings[mid];        postings[mid] = postings[hi];        postings[hi] = tmp;        if (comparePostings(postings[lo].p, postings[mid].p) > 0) {          PostingVector tmp2 = postings[lo];          postings[lo] = postings[mid];          postings[mid] = tmp2;        }      }      int left = lo + 1;      int right = hi - 1;      if (left >= right)        return;      PostingVector partition = postings[mid];      for (; ;) {        while (comparePostings(postings[right].p, partition.p) > 0)          --right;        while (left < right && comparePostings(postings[left].p, partition.p) <= 0)          ++left;        if (left < right) {          PostingVector tmp = postings[left];          postings[left] = postings[right];          postings[right] = tmp;          --right;        } else {          break;        }      }      quickSort(postings, lo, left);      quickSort(postings, left + 1, hi);    }    /** If there are fields we've seen but did not see again     *  in the last run, then free them up.  Also reduce     *  postings hash size. */    void trimFields() {      int upto = 0;      for(int i=0;i<numAllFieldData;i++) {        FieldData fp = allFieldDataArray[i];        if (fp.lastGen == -1) {          // This field was not seen since the previous          // flush, so, free up its resources now          // Unhash          final int hashPos = fp.fieldInfo.name.hashCode() & fieldDataHashMask;          FieldData last = null;          FieldData fp0 = fieldDataHash[hashPos];          while(fp0 != fp) {            last = fp0;            fp0 = fp0.next;          }          assert fp0 != null;          if (last == null)            fieldDataHash[hashPos] = fp.next;          else            last.next = fp.next;          if (infoStream != null)            infoStream.println("  remove field=" + fp.fieldInfo.name);        } else {          // Reset          fp.lastGen = -1;          allFieldDataArray[upto++] = fp;                    if (fp.numPostings > 0 && ((float) fp.numPostings) / fp.postingsHashSize < 0.2) {            int hashSize = fp.postingsHashSize;            // Reduce hash so it's between 25-50% full            while (fp.numPostings < (hashSize>>1) && hashSize >= 2)              hashSize >>= 1;            hashSize <<= 1;            if (hashSize != fp.postingsHash.length)              fp.rehashPostings(hashSize);          }        }      }      // If we didn't see any norms for this field since      // last flush, free it      for(int i=0;i<norms.length;i++) {        BufferedNorms n = norms[i];        if (n != null && n.upto == 0)          norms[i] = null;      }      numAllFieldData = upto;      // Also pare back PostingsVectors if it's excessively      // large      if (maxPostingsVectors * 1.5 < postingsVectors.length) {        final int newSize;        if (0 == maxPostingsVectors)          newSize = 1;        else          newSize = (int) (1.5*maxPostingsVectors);        PostingVector[] newArray = new PostingVector[newSize];        System.arraycopy(postingsVectors, 0, newArray, 0, newSize);        postingsVectors = newArray;      }    }    /** Tokenizes the fields of a document into Postings */    void processDocument(Analyzer analyzer)      throws IOException, AbortException {      final int numFields = numFieldData;      assert 0 == fdtLocal.length();      if (tvx != null)        // If we are writing vectors then we must visit        // fields in sorted order so they are written in        // sorted order.  TODO: we actually only need to        // sort the subset of fields that have vectors        // enabled; we could save [small amount of] CPU        // here.        Arrays.sort(fieldDataArray, 0, numFields);      // We process the document one field at a time      for(int i=0;i<numFields;i++)        fieldDataArray[i].processField(analyzer);      if (maxTermPrefix != null && infoStream != null)        infoStream.println("WARNING: document contains at least one immense term (longer than the max length " + MAX_TERM_LENGTH + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + maxTermPrefix + "...'");       if (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH          && numBytesUsed > 0.95 * ramBufferSize)        balanceRAM();    }    final ByteBlockPool postingsPool = new ByteBlockPool(true);    final ByteBlockPool vectorsPool = new ByteBlockPool(false);    final CharBlockPool charPool = new CharBlockPool();    // Current posting we are working on    Posting p;    PostingVector vector;    // USE ONLY FOR DEBUGGING!    /*      public String getPostingText() {      char[] text = charPool.buffers[p.textStart >> CHAR_BLOCK_SHIFT];      int upto = p.textStart & CHAR_BLOCK_MASK;      while(text[upto] != 0xffff)      upto++;      return new String(text, p.textStart, upto-(p.textStart & BYTE_BLOCK_MASK));      }    */    /** Test whether the text for current Posting p equals     *  current tokenText. */    boolean postingEquals(final char[] tokenText, final int tokenTextLen) {      final char[] text = charPool.buffers[p.textStart >> CHAR_BLOCK_SHIFT];      assert text != null;      int pos = p.textStart & CHAR_BLOCK_MASK;      int tokenPos = 0;      for(;tokenPos<tokenTextLen;pos++,tokenPos++)        if (tokenText[tokenPos] != text[pos])          return false;      return 0xffff == text[pos];    }    /** Compares term text for two Posting instance and     *  returns -1 if p1 < p2; 1 if p1 > p2; else 0.     */    int comparePostings(Posting p1, Posting p2) {      final char[] text1 = charPool.buffers[p1.textStart >> CHAR_BLOCK_SHIFT];      int pos1 = p1.textStart & CHAR_BLOCK_MASK;      final char[] text2 = charPool.buffers[p2.textStart >> CHAR_BLOCK_SHIFT];      int pos2 = p2.textStart & CHAR_BLOCK_MASK;      while(true) {        final char c1 = text1[pos1++];        final char c2 = text2[pos2++];        if (c1 < c2)          if (0xffff == c2)            return 1;          else
💿 文件大小 5390 K
👤 上传用户 rickie936
📂 所属分类 Java编程
🏷️ 相关标签

#SearchEngine #open-source #Framework #Lucene
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -