📄 documentswriter.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
        if (tvx != null) {          try {            tvx.close();          } catch (Throwable t) {          }          tvx = null;        }        if (tvd != null) {          try {            tvd.close();          } catch (Throwable t) {          }          tvd = null;        }        if (tvf != null) {          try {            tvf.close();          } catch (Throwable t) {          }          tvf = null;        }        // Reset fields writer        if (fieldsWriter != null) {          try {            fieldsWriter.close();          } catch (Throwable t) {          }          fieldsWriter = null;        }        // Discard pending norms:        final int numField = fieldInfos.size();        for (int i=0;i<numField;i++) {          FieldInfo fi = fieldInfos.fieldInfo(i);          if (fi.isIndexed && !fi.omitNorms) {            BufferedNorms n = norms[i];            if (n != null)              try {                n.reset();              } catch (Throwable t) {              }          }        }        // Reset all postings data        resetPostingsData();      } finally {        resumeAllThreads();      }      // If we have a root cause exception, re-throw it now:      if (ae != null) {        Throwable t = ae.getCause();        if (t instanceof IOException)          throw (IOException) t;        else if (t instanceof RuntimeException)          throw (RuntimeException) t;        else if (t instanceof Error)          throw (Error) t;        else          // Should not get here          assert false: "unknown exception: " + t;      }    } finally {      if (ae != null)        abortCount--;      notifyAll();    }  }  /** Reset after a flush */  private void resetPostingsData() throws IOException {    // All ThreadStates should be idle when we are called    assert allThreadsIdle();    threadBindings.clear();    segment = null;    numDocsInRAM = 0;    nextDocID = 0;    nextWriteDocID = 0;    files = null;    balanceRAM();    bufferIsFull = false;    flushPending = false;    for(int i=0;i<threadStates.length;i++) {      threadStates[i].numThreads = 0;      threadStates[i].resetPostings();    }    numBytesUsed = 0;  }  // Returns true if an abort is in progress  synchronized boolean pauseAllThreads() {    pauseThreads++;    while(!allThreadsIdle()) {      try {        wait();      } catch (InterruptedException e) {        Thread.currentThread().interrupt();      }    }    return abortCount > 0;  }  synchronized void resumeAllThreads() {    pauseThreads--;    assert pauseThreads >= 0;    if (0 == pauseThreads)      notifyAll();  }  private synchronized boolean allThreadsIdle() {    for(int i=0;i<threadStates.length;i++)      if (!threadStates[i].isIdle)        return false;    return true;  }  private boolean hasNorms;                       // Whether any norms were seen since last flush  List newFiles;  /** Flush all pending docs to a new segment */  synchronized int flush(boolean closeDocStore) throws IOException {    assert allThreadsIdle();    if (segment == null)      // In case we are asked to flush an empty segment      segment = writer.newSegmentName();    newFiles = new ArrayList();    docStoreOffset = numDocsInStore;    int docCount;    assert numDocsInRAM > 0;    if (infoStream != null)      infoStream.println("\nflush postings as segment " + segment + " numDocs=" + numDocsInRAM);        boolean success = false;    try {      if (closeDocStore) {        assert docStoreSegment != null;        assert docStoreSegment.equals(segment);        newFiles.addAll(files());        closeDocStore();      }          fieldInfos.write(directory, segment + ".fnm");      docCount = numDocsInRAM;      newFiles.addAll(writeSegment());      success = true;    } finally {      if (!success)        abort(null);    }    return docCount;  }  /** Build compound file for the segment we just flushed */  void createCompoundFile(String segment) throws IOException  {    CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);    final int size = newFiles.size();    for(int i=0;i<size;i++)      cfsWriter.addFile((String) newFiles.get(i));          // Perform the merge    cfsWriter.close();  }  /** Set flushPending if it is not already set and returns   *  whether it was set. This is used by IndexWriter to *   *  trigger a single flush even when multiple threads are   *  * trying to do so. */  synchronized boolean setFlushPending() {    if (flushPending)      return false;    else {      flushPending = true;      return true;    }  }  synchronized void clearFlushPending() {    flushPending = false;  }  /** Per-thread state.  We keep a separate Posting hash and   *  other state for each thread and then merge postings *   *  hashes from all threads when writing the segment. */  private final class ThreadState {    Posting[] postingsFreeList;           // Free Posting instances    int postingsFreeCount;    RAMOutputStream tvfLocal = new RAMOutputStream();    // Term vectors for one doc    RAMOutputStream fdtLocal = new RAMOutputStream();    // Stored fields for one doc    FieldsWriter localFieldsWriter;       // Fields for one doc    long[] vectorFieldPointers;    int[] vectorFieldNumbers;    boolean isIdle = true;                // Whether we are in use    int numThreads = 1;                   // Number of threads that use this instance    int docID;                            // docID we are now working on    int numStoredFields;                  // How many stored fields in current doc    float docBoost;                       // Boost for current doc    FieldData[] fieldDataArray;           // Fields touched by current doc    int numFieldData;                     // How many fields in current doc    int numVectorFields;                  // How many vector fields in current doc    FieldData[] allFieldDataArray = new FieldData[10]; // All FieldData instances    int numAllFieldData;    FieldData[] fieldDataHash;            // Hash FieldData instances by field name    int fieldDataHashMask;    String maxTermPrefix;                 // Non-null prefix of a too-large term if this                                          // doc has one    boolean doFlushAfter;    public ThreadState() {      fieldDataArray = new FieldData[8];      fieldDataHash = new FieldData[16];      fieldDataHashMask = 15;      vectorFieldPointers = new long[10];      vectorFieldNumbers = new int[10];      postingsFreeList = new Posting[256];      postingsFreeCount = 0;    }    /** Clear the postings hash and return objects back to     *  shared pool */    public void resetPostings() throws IOException {      fieldGen = 0;      maxPostingsVectors = 0;      doFlushAfter = false;      if (localFieldsWriter != null) {        localFieldsWriter.close();        localFieldsWriter = null;      }      postingsPool.reset();      charPool.reset();      recyclePostings(postingsFreeList, postingsFreeCount);      postingsFreeCount = 0;      for(int i=0;i<numAllFieldData;i++) {        FieldData fp = allFieldDataArray[i];        fp.lastGen = -1;        if (fp.numPostings > 0)          fp.resetPostingArrays();      }    }    /** Move all per-document state that was accumulated in     *  the ThreadState into the "real" stores. */    public void writeDocument() throws IOException, AbortException {      // If we hit an exception while appending to the      // stored fields or term vectors files, we have to      // abort all documents since we last flushed because      // it means those files are possibly inconsistent.      try {        numDocsInStore++;        // Append stored fields to the real FieldsWriter:        fieldsWriter.flushDocument(numStoredFields, fdtLocal);        fdtLocal.reset();        // Append term vectors to the real outputs:        if (tvx != null) {          tvx.writeLong(tvd.getFilePointer());          tvd.writeVInt(numVectorFields);          if (numVectorFields > 0) {            for(int i=0;i<numVectorFields;i++)              tvd.writeVInt(vectorFieldNumbers[i]);            assert 0 == vectorFieldPointers[0];            tvd.writeVLong(tvf.getFilePointer());            long lastPos = vectorFieldPointers[0];            for(int i=1;i<numVectorFields;i++) {              long pos = vectorFieldPointers[i];              tvd.writeVLong(pos-lastPos);              lastPos = pos;            }            tvfLocal.writeTo(tvf);            tvfLocal.reset();          }        }        // Append norms for the fields we saw:        for(int i=0;i<numFieldData;i++) {          FieldData fp = fieldDataArray[i];          if (fp.doNorms) {            BufferedNorms bn = norms[fp.fieldInfo.number];            assert bn != null;            assert bn.upto <= docID;            bn.fill(docID);            float norm = fp.boost * writer.getSimilarity().lengthNorm(fp.fieldInfo.name, fp.length);            bn.add(norm);          }        }      } catch (Throwable t) {        // Forcefully idle this threadstate -- its state will        // be reset by abort()        isIdle = true;        throw new AbortException(t, DocumentsWriter.this);      }      if (bufferIsFull && !flushPending) {        flushPending = true;        doFlushAfter = true;      }    }    int fieldGen;    /** Initializes shared state for this new document */    void init(Document doc, int docID) throws IOException, AbortException {      assert !isIdle;      assert writer.testPoint("DocumentsWriter.ThreadState.init start");      this.docID = docID;      docBoost = doc.getBoost();      numStoredFields = 0;      numFieldData = 0;      numVectorFields = 0;      maxTermPrefix = null;      assert 0 == fdtLocal.length();      assert 0 == fdtLocal.getFilePointer();      assert 0 == tvfLocal.length();      assert 0 == tvfLocal.getFilePointer();      final int thisFieldGen = fieldGen++;      List docFields = doc.getFields();      final int numDocFields = docFields.size();      boolean docHasVectors = false;      // Absorb any new fields first seen in this document.      // Also absorb any changes to fields we had already      // seen before (eg suddenly turning on norms or      // vectors, etc.):      for(int i=0;i<numDocFields;i++) {        Fieldable field = (Fieldable) docFields.get(i);        FieldInfo fi = fieldInfos.add(field.name(), field.isIndexed(), field.isTermVectorStored(),                                      field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),                                      field.getOmitNorms(), false);        if (fi.isIndexed && !fi.omitNorms) {          // Maybe grow our buffered norms          if (norms.length <= fi.number) {            int newSize = (int) ((1+fi.number)*1.25);            BufferedNorms[] newNorms = new BufferedNorms[newSize];            System.arraycopy(norms, 0, newNorms, 0, norms.length);            norms = newNorms;          }                    if (norms[fi.number] == null)            norms[fi.number] = new BufferedNorms();          hasNorms = true;        }        // Make sure we have a FieldData allocated        int hashPos = fi.name.hashCode() & fieldDataHashMask;        FieldData fp = fieldDataHash[hashPos];        while(fp != null && !fp.fieldInfo.name.equals(fi.name))          fp = fp.next;        if (fp == null) {          fp = new FieldData(fi);          fp.next = fieldDataHash[hashPos];          fieldDataHash[hashPos] = fp;          if (numAllFieldData == allFieldDataArray.length) {
💿 文件大小 5390 K
👤 上传用户 rickie936
📂 所属分类 Java编程
🏷️ 相关标签

#SearchEngine #open-source #Framework #Lucene
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -