📄 indexwriter.java

📁 lucene2.2.0版本
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
  /**   * Deletes the document(s) containing any of the   * terms. All deletes are flushed at the same time.   * @param terms array of terms to identify the documents   * to be deleted   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public synchronized void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException {    ensureOpen();    for (int i = 0; i < terms.length; i++) {      bufferDeleteTerm(terms[i]);    }    maybeFlushRamSegments();  }  /**   * Updates a document by first deleting the document(s)   * containing <code>term</code> and then adding the new   * document.  The delete and then add are atomic as seen   * by a reader on the same index (flush may happen only after   * the add).   * @param term the term to identify the document(s) to be   * deleted   * @param doc the document to be added   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {    ensureOpen();    updateDocument(term, doc, getAnalyzer());  }  /**   * Updates a document by first deleting the document(s)   * containing <code>term</code> and then adding the new   * document.  The delete and then add are atomic as seen   * by a reader on the same index (flush may happen only after   * the add).   * @param term the term to identify the document(s) to be   * deleted   * @param doc the document to be added   * @param analyzer the analyzer to use when analyzing the document   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void updateDocument(Term term, Document doc, Analyzer analyzer)      throws CorruptIndexException, IOException {    ensureOpen();    SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);    synchronized (this) {      bufferDeleteTerm(term);      ramSegmentInfos.addElement(newSegmentInfo);      maybeFlushRamSegments();    }  }  final synchronized String newRamSegmentName() {    return "_ram_" + Integer.toString(ramSegmentInfos.counter++, Character.MAX_RADIX);  }  // for test purpose  final synchronized int getSegmentCount(){    return segmentInfos.size();  }  // for test purpose  final synchronized int getRamSegmentCount(){    return ramSegmentInfos.size();  }  // for test purpose  final synchronized int getDocCount(int i) {    if (i >= 0 && i < segmentInfos.size()) {      return segmentInfos.info(i).docCount;    } else {      return -1;    }  }  final synchronized String newSegmentName() {    return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);  }  /** Determines how often segment indices are merged by addDocument().  With   * smaller values, less RAM is used while indexing, and searches on   * unoptimized indices are faster, but indexing speed is slower.  With larger   * values, more RAM is used during indexing, and while searches on unoptimized   * indices are slower, indexing is faster.  Thus larger values (> 10) are best   * for batch index creation, and smaller values (< 10) for indices that are   * interactively maintained.   *   * <p>This must never be less than 2.  The default value is {@link #DEFAULT_MERGE_FACTOR}.   */  private int mergeFactor = DEFAULT_MERGE_FACTOR;  /** Determines the minimal number of documents required before the buffered   * in-memory documents are merging and a new Segment is created.   * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},   * large value gives faster indexing.  At the same time, mergeFactor limits   * the number of files open in a FSDirectory.   *   * <p> The default value is {@link #DEFAULT_MAX_BUFFERED_DOCS}.   */  private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS;  /** Determines the largest number of documents ever merged by addDocument().   * Small values (e.g., less than 10,000) are best for interactive indexing,   * as this limits the length of pauses while indexing to a few seconds.   * Larger values are best for batched indexing and speedier searches.   *   * <p>The default value is {@link #DEFAULT_MAX_MERGE_DOCS}.   */  private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;  /** If non-null, information about merges will be printed to this.   */  private PrintStream infoStream = null;  private static PrintStream defaultInfoStream = null;  /** Merges all segments together into a single segment,   * optimizing an index for search.   *   * <p>It is recommended that this method be called upon completion of indexing.  In   * environments with frequent updates, optimize is best done during low volume times, if at all.    *    * </p>   * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p>   *   * <p>Note that this requires substantial temporary free   * space in the Directory (see <a target="_top"   * href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a>   * for details):</p>   *   * <ul>   * <li>   *    * <p>If no readers/searchers are open against the index,   * then free space required is up to 1X the total size of   * the starting index.  For example, if the starting   * index is 10 GB, then you must have up to 10 GB of free   * space before calling optimize.</p>   *   * <li>   *    * <p>If readers/searchers are using the index, then free   * space required is up to 2X the size of the starting   * index.  This is because in addition to the 1X used by   * optimize, the original 1X of the starting index is   * still consuming space in the Directory as the readers   * are holding the segments files open.  Even on Unix,   * where it will appear as if the files are gone ("ls"   * won't list them), they still consume storage due to   * "delete on last close" semantics.</p>   *    * <p>Furthermore, if some but not all readers re-open   * while the optimize is underway, this will cause > 2X   * temporary space to be consumed as those new readers   * will then hold open the partially optimized segments at   * that time.  It is best not to re-open readers while   * optimize is running.</p>   *   * </ul>   *   * <p>The actual temporary usage could be much less than   * these figures (it depends on many factors).</p>   *   * <p>Once the optimize completes, the total size of the   * index will be less than the size of the starting index.   * It could be quite a bit smaller (if there were many   * pending deletes) or just slightly smaller.</p>   *   * <p>If an Exception is hit during optimize(), for example   * due to disk full, the index will not be corrupt and no   * documents will have been lost.  However, it may have   * been partially optimized (some segments were merged but   * not all), and it's possible that one of the segments in   * the index will be in non-compound format even when   * using compound file format.  This will occur when the   * Exception is hit during conversion of the segment into   * compound format.</p>   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error  */  public synchronized void optimize() throws CorruptIndexException, IOException {    ensureOpen();    flushRamSegments();    while (segmentInfos.size() > 1 ||           (segmentInfos.size() == 1 &&            (SegmentReader.hasDeletions(segmentInfos.info(0)) ||             SegmentReader.hasSeparateNorms(segmentInfos.info(0)) ||             segmentInfos.info(0).dir != directory ||             (useCompoundFile &&              (!SegmentReader.usesCompoundFile(segmentInfos.info(0))))))) {      int minSegment = segmentInfos.size() - mergeFactor;      mergeSegments(segmentInfos, minSegment < 0 ? 0 : minSegment, segmentInfos.size());    }  }  /*   * Begin a transaction.  During a transaction, any segment   * merges that happen (or ram segments flushed) will not   * write a new segments file and will not remove any files   * that were present at the start of the transaction.  You   * must make a matched (try/finally) call to   * commitTransaction() or rollbackTransaction() to finish   * the transaction.   */  private void startTransaction() throws IOException {    localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();    localAutoCommit = autoCommit;    if (localAutoCommit) {      flushRamSegments();      // Turn off auto-commit during our local transaction:      autoCommit = false;    } else      // We must "protect" our files at this point from      // deletion in case we need to rollback:      deleter.incRef(segmentInfos, false);  }  /*   * Rolls back the transaction and restores state to where   * we were at the start.   */  private void rollbackTransaction() throws IOException {    // First restore autoCommit in case we hit an exception below:    autoCommit = localAutoCommit;    // Keep the same segmentInfos instance but replace all    // of its SegmentInfo instances.  This is so the next    // attempt to commit using this instance of IndexWriter    // will always write to a new generation ("write once").    segmentInfos.clear();    segmentInfos.addAll(localRollbackSegmentInfos);    localRollbackSegmentInfos = null;    // Ask deleter to locate unreferenced files we had    // created & remove them:    deleter.checkpoint(segmentInfos, false);    if (!autoCommit)      // Remove the incRef we did in startTransaction:      deleter.decRef(segmentInfos);    deleter.refresh();  }  /*   * Commits the transaction.  This will write the new   * segments file and remove and pending deletions we have   * accumulated during the transaction   */  private void commitTransaction() throws IOException {    // First restore autoCommit in case we hit an exception below:    autoCommit = localAutoCommit;    boolean success = false;    try {      checkpoint();      success = true;    } finally {      if (!success) {        rollbackTransaction();      }    }    if (!autoCommit)      // Remove the incRef we did in startTransaction.      deleter.decRef(localRollbackSegmentInfos);    localRollbackSegmentInfos = null;    // Give deleter a chance to remove files now:    deleter.checkpoint(segmentInfos, autoCommit);  }  /**   * Close the <code>IndexWriter</code> without committing   * any of the changes that have occurred since it was   * opened. This removes any temporary files that had been   * created, after which the state of the index will be the   * same as it was when this writer was first opened.  This   * can only be called when this IndexWriter was opened   * with <code>autoCommit=false</code>.   * @throws IllegalStateException if this is called when   *  the writer was opened with <code>autoCommit=true</code>.   * @throws IOException if there is a low-level IO error   */  public synchronized void abort() throws IOException {    ensureOpen();    if (!autoCommit) {      // Keep the same segmentInfos instance but replace all      // of its SegmentInfo instances.  This is so the next      // attempt to commit using this instance of IndexWriter      // will always write to a new generation ("write once").      segmentInfos.clear();      segmentInfos.addAll(rollbackSegmentInfos);      // Ask deleter to locate unreferenced files & remove      // them:      deleter.checkpoint(segmentInfos, false);      deleter.refresh();      ramSegmentInfos = new SegmentInfos();      bufferedDeleteTerms.clear();      numBufferedDeleteTerms = 0;      commitPending = false;      close();    } else {      throw new IllegalStateException("abort() can only be called when IndexWriter was opened with autoCommit=false");    }  }   /*   * Called whenever the SegmentInfos has been updated and   * the index files referenced exist (correctly) in the   * index directory.  If we are in autoCommit mode, we   * commit the change immediately.  Else, we mark   * commitPending.   */  private void checkpoint() throws IOException {    if (autoCommit) {      segmentInfos.write(directory);    } else {      commitPending = true;    }  }  /** Merges all segments from an array of indexes into this index.   *   * <p>This may be used to parallelize batch indexing.  A large document
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -