📄 indexwriter.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
   *   * <p> Note that if an Exception is hit (for example disk full)   * then the index will be consistent, but this document   * may not have been added.  Furthermore, it's possible   * the index will have one segment in non-compound format   * even when using compound files (when a merge has   * partially succeeded).</p>   *   * <p> This method periodically flushes pending documents   * to the Directory (every {@link #setMaxBufferedDocs}),   * and also periodically merges segments in the index   * (every {@link #setMergeFactor} flushes).  When this   * occurs, the method will take more time to run (possibly   * a long time if the index is large), and will require   * free temporary space in the Directory to do the   * merging.</p>   *   * <p>The amount of free space required when a merge is triggered is   * up to 1X the size of all segments being merged, when no   * readers/searchers are open against the index, and up to 2X the   * size of all segments being merged when readers/searchers are open   * against the index (see {@link #optimize()} for details). The   * sequence of primitive merge operations performed is governed by   * the merge policy.   *   * <p>Note that each term in the document can be no longer   * than 16383 characters, otherwise an   * IllegalArgumentException will be thrown.</p>   *   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void addDocument(Document doc) throws CorruptIndexException, IOException {    addDocument(doc, analyzer);  }  /**   * Adds a document to this index, using the provided analyzer instead of the   * value of {@link #getAnalyzer()}.  If the document contains more than   * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are   * discarded.   *   * <p>See {@link #addDocument(Document)} for details on   * index and IndexWriter state after an Exception, and   * flushing/merging temporary free space requirements.</p>   *   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {    ensureOpen();    boolean doFlush = false;    boolean success = false;    try {      try {        doFlush = docWriter.addDocument(doc, analyzer);        success = true;      } finally {        if (!success) {          if (infoStream != null)            message("hit exception adding document");          synchronized (this) {            // If docWriter has some aborted files that were            // never incref'd, then we clean them up here            if (docWriter != null) {              final List files = docWriter.abortedFiles();              if (files != null)                deleter.deleteNewFiles(files);            }          }        }      }      if (doFlush)        flush(true, false);    } catch (OutOfMemoryError oom) {      hitOOM = true;      throw oom;    }  }  /**   * Deletes the document(s) containing <code>term</code>.   * @param term the term to identify the documents to be deleted   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void deleteDocuments(Term term) throws CorruptIndexException, IOException {    ensureOpen();    try {      boolean doFlush = docWriter.bufferDeleteTerm(term);      if (doFlush)        flush(true, false);    } catch (OutOfMemoryError oom) {      hitOOM = true;      throw oom;    }  }  /**   * Deletes the document(s) containing any of the   * terms. All deletes are flushed at the same time.   * @param terms array of terms to identify the documents   * to be deleted   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException {    ensureOpen();    try {      boolean doFlush = docWriter.bufferDeleteTerms(terms);      if (doFlush)        flush(true, false);    } catch (OutOfMemoryError oom) {      hitOOM = true;      throw oom;    }  }  /**   * Updates a document by first deleting the document(s)   * containing <code>term</code> and then adding the new   * document.  The delete and then add are atomic as seen   * by a reader on the same index (flush may happen only after   * the add).   * @param term the term to identify the document(s) to be   * deleted   * @param doc the document to be added   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {    ensureOpen();    updateDocument(term, doc, getAnalyzer());  }  /**   * Updates a document by first deleting the document(s)   * containing <code>term</code> and then adding the new   * document.  The delete and then add are atomic as seen   * by a reader on the same index (flush may happen only after   * the add).   * @param term the term to identify the document(s) to be   * deleted   * @param doc the document to be added   * @param analyzer the analyzer to use when analyzing the document   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void updateDocument(Term term, Document doc, Analyzer analyzer)      throws CorruptIndexException, IOException {    ensureOpen();    try {      boolean doFlush = false;      boolean success = false;      try {        doFlush = docWriter.updateDocument(term, doc, analyzer);        success = true;      } finally {        if (!success) {          if (infoStream != null)            message("hit exception updating document");          synchronized (this) {            // If docWriter has some aborted files that were            // never incref'd, then we clean them up here            final List files = docWriter.abortedFiles();            if (files != null)              deleter.deleteNewFiles(files);          }        }      }      if (doFlush)        flush(true, false);    } catch (OutOfMemoryError oom) {      hitOOM = true;      throw oom;    }  }  // for test purpose  final synchronized int getSegmentCount(){    return segmentInfos.size();  }  // for test purpose  final synchronized int getNumBufferedDocuments(){    return docWriter.getNumDocsInRAM();  }  // for test purpose  final synchronized int getDocCount(int i) {    if (i >= 0 && i < segmentInfos.size()) {      return segmentInfos.info(i).docCount;    } else {      return -1;    }  }  final String newSegmentName() {    // Cannot synchronize on IndexWriter because that causes    // deadlock    synchronized(segmentInfos) {      // Important to set commitPending so that the      // segmentInfos is written on close.  Otherwise we      // could close, re-open and re-return the same segment      // name that was previously returned which can cause      // problems at least with ConcurrentMergeScheduler.      commitPending = true;      return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);    }  }  /** If non-null, information about merges will be printed to this.   */  private PrintStream infoStream = null;  private static PrintStream defaultInfoStream = null;  /**   * Requests an "optimize" operation on an index, priming the index   * for the fastest available search. Traditionally this has meant   * merging all segments into a single segment as is done in the   * default merge policy, but individaul merge policies may implement   * optimize in different ways.   *   * @see LogMergePolicy#findMergesForOptimize   *   * <p>It is recommended that this method be called upon completion of indexing.  In   * environments with frequent updates, optimize is best done during low volume times, if at all.    *    * </p>   * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p>   *   * <p>Note that this can require substantial temporary free   * space in the Directory (see <a target="_top"   * href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a>   * for details):</p>   *   * <ul>   * <li>   *    * <p>If no readers/searchers are open against the index,   * then free space required is up to 1X the total size of   * the starting index.  For example, if the starting   * index is 10 GB, then you must have up to 10 GB of free   * space before calling optimize.</p>   *   * <li>   *    * <p>If readers/searchers are using the index, then free   * space required is up to 2X the size of the starting   * index.  This is because in addition to the 1X used by   * optimize, the original 1X of the starting index is   * still consuming space in the Directory as the readers   * are holding the segments files open.  Even on Unix,   * where it will appear as if the files are gone ("ls"   * won't list them), they still consume storage due to   * "delete on last close" semantics.</p>   *    * <p>Furthermore, if some but not all readers re-open   * while the optimize is underway, this will cause > 2X   * temporary space to be consumed as those new readers   * will then hold open the partially optimized segments at   * that time.  It is best not to re-open readers while   * optimize is running.</p>   *   * </ul>   *   * <p>The actual temporary usage could be much less than   * these figures (it depends on many factors).</p>   *   * <p>In general, once the optimize completes, the total size of the   * index will be less than the size of the starting index.   * It could be quite a bit smaller (if there were many   * pending deletes) or just slightly smaller.</p>   *   * <p>If an Exception is hit during optimize(), for example   * due to disk full, the index will not be corrupt and no   * documents will have been lost.  However, it may have   * been partially optimized (some segments were merged but   * not all), and it's possible that one of the segments in   * the index will be in non-compound format even when   * using compound file format.  This will occur when the   * Exception is hit during conversion of the segment into   * compound format.</p>   *   * <p>This call will optimize those segments present in   * the index when the call started.  If other threads are   * still adding documents and flushing segments, those   * newly created segments will not be optimized unless you   * call optimize again.</p>   *   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error  */  public void optimize() throws CorruptIndexException, IOException {    optimize(true);  }  /**   * Optimize the index down to <= maxNumSegments.  If   * maxNumSegments==1 then this is the same as {@link   * #optimize()}.   * @param maxNumSegments maximum number of segments left   * in the index after optimization finishes   */  public void optimize(int maxNumSegments) throws CorruptIndexException, IOException {    optimize(maxNumSegments, true);  }  /** Just like {@link #optimize()}, except you can specify   *  whether the call should block until the optimize   *  completes.  This is only meaningful with a   *  {@link MergeScheduler} that is able to run merges in   *  background threads. */  public void optimize(boolean doWait) throws CorruptIndexException, IOException {    optimize(1, doWait);  }  /** Just like {@link #optimize(int)}, except you can   *  specify whether the call should block until the   *  optimize completes.  This is only meaningful with a   *  {@link MergeScheduler} that is able to run merges in   *  background threads. */  public void optimize(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {    ensureOpen();    if (maxNumSegments < 1)      throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);    if (infoStream != null)      message("optimize: index now " + segString());    flush();    synchronized(this) {      resetMergeExceptions();      segmentsToOptimize = new HashSet();      final int numSegments = segmentInfos.size();      for(int i=0;i<numSegments;i++)        segmentsToOptimize.add(segmentInfos.info(i));            // Now mark all pending & running merges as optimize      // merge:      Iterator it = pendingMerges.iterator();      while(it.hasNext()) {        final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next();        merge.optimize = true;        merge.maxNumSegmentsOptimize = maxNumSegments;
上一页 1 2 3 45
💿 文件大小 5390 K
👤 上传用户 rickie936
📂 所属分类 Java编程
🏷️ 相关标签

#SearchEngine #open-source #Framework #Lucene
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -