📄 indexwriter.java

📁 全文检索lucene2.0的源码请笑纳
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
   * @param create <code>true</code> to create the index or overwrite   *  the existing one; <code>false</code> to append to the existing   *  index   * @param deletionPolicy see <a href="#deletionPolicy">above</a>   * @throws CorruptIndexException if the index is corrupt   * @throws LockObtainFailedException if another writer   *  has this index open (<code>write.lock</code> could not   *  be obtained)   * @throws IOException if the directory cannot be read/written to, or   *  if it does not exist and <code>create</code> is   *  <code>false</code> or if there is any other low-level   *  IO error   * @deprecated This constructor will be removed in the 3.0 release.   *  Use {@link   *  #IndexWriter(Directory,Analyzer,boolean,IndexDeletionPolicy,MaxFieldLength)}   *  instead, and call {@link #commit()} when needed.   */  public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy)          throws CorruptIndexException, LockObtainFailedException, IOException {    init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH);  }  private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)    throws CorruptIndexException, LockObtainFailedException, IOException {    if (IndexReader.indexExists(d)) {      init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength);    } else {      init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength);    }  }  private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)    throws CorruptIndexException, LockObtainFailedException, IOException {    this.closeDir = closeDir;    directory = d;    analyzer = a;    setMessageID(defaultInfoStream);    this.maxFieldLength = maxFieldLength;    if (create) {      // Clear the write lock in case it's leftover:      directory.clearLock(WRITE_LOCK_NAME);    }    Lock writeLock = directory.makeLock(WRITE_LOCK_NAME);    if (!writeLock.obtain(writeLockTimeout)) // obtain write lock      throw new LockObtainFailedException("Index locked for write: " + writeLock);    this.writeLock = writeLock;                   // save it    try {      if (create) {        // Try to read first.  This is to allow create        // against an index that's currently open for        // searching.  In this case we write the next        // segments_N file with no segments:        try {          segmentInfos.read(directory);          segmentInfos.clear();        } catch (IOException e) {          // Likely this means it's a fresh directory        }        segmentInfos.commit(directory);      } else {        segmentInfos.read(directory);        // We assume that this segments_N was previously        // properly sync'd:        for(int i=0;i<segmentInfos.size();i++) {          final SegmentInfo info = segmentInfos.info(i);          List files = info.files();          for(int j=0;j<files.size();j++)            synced.add(files.get(j));        }      }      this.autoCommit = autoCommit;      setRollbackSegmentInfos(segmentInfos);      docWriter = new DocumentsWriter(directory, this);      docWriter.setInfoStream(infoStream);      docWriter.setMaxFieldLength(maxFieldLength);      // Default deleter (for backwards compatibility) is      // KeepOnlyLastCommitDeleter:      deleter = new IndexFileDeleter(directory,                                     deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,                                     segmentInfos, infoStream, docWriter);      pushMaxBufferedDocs();      if (infoStream != null) {        message("init: create=" + create);        messageState();      }    } catch (IOException e) {      this.writeLock.release();      this.writeLock = null;      throw e;    }  }  private synchronized void setRollbackSegmentInfos(SegmentInfos infos) {    rollbackSegmentInfos = (SegmentInfos) infos.clone();    assert !hasExternalSegments(rollbackSegmentInfos);    rollbackSegments = new HashMap();    final int size = rollbackSegmentInfos.size();    for(int i=0;i<size;i++)      rollbackSegments.put(rollbackSegmentInfos.info(i), new Integer(i));  }  /**   * Expert: set the merge policy used by this writer.   */  public void setMergePolicy(MergePolicy mp) {    ensureOpen();    if (mp == null)      throw new NullPointerException("MergePolicy must be non-null");    if (mergePolicy != mp)      mergePolicy.close();    mergePolicy = mp;    pushMaxBufferedDocs();    if (infoStream != null)      message("setMergePolicy " + mp);  }  /**   * Expert: returns the current MergePolicy in use by this writer.   * @see #setMergePolicy   */  public MergePolicy getMergePolicy() {    ensureOpen();    return mergePolicy;  }  /**   * Expert: set the merge scheduler used by this writer.   */  synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {    ensureOpen();    if (mergeScheduler == null)      throw new NullPointerException("MergeScheduler must be non-null");    if (this.mergeScheduler != mergeScheduler) {      finishMerges(true);      this.mergeScheduler.close();    }    this.mergeScheduler = mergeScheduler;    if (infoStream != null)      message("setMergeScheduler " + mergeScheduler);  }  /**   * Expert: returns the current MergePolicy in use by this   * writer.   * @see #setMergePolicy   */  public MergeScheduler getMergeScheduler() {    ensureOpen();    return mergeScheduler;  }  /** <p>Determines the largest segment (measured by   * document count) that may be merged with other segments.   * Small values (e.g., less than 10,000) are best for   * interactive indexing, as this limits the length of   * pauses while indexing to a few seconds.  Larger values   * are best for batched indexing and speedier   * searches.</p>   *   * <p>The default value is {@link Integer#MAX_VALUE}.</p>   *   * <p>Note that this method is a convenience method: it   * just calls mergePolicy.setMaxMergeDocs as long as   * mergePolicy is an instance of {@link LogMergePolicy}.   * Otherwise an IllegalArgumentException is thrown.</p>   *   * <p>The default merge policy ({@link   * LogByteSizeMergePolicy}) also allows you to set this   * limit by net size (in MB) of the segment, using {@link   * LogByteSizeMergePolicy#setMaxMergeMB}.</p>   */  public void setMaxMergeDocs(int maxMergeDocs) {    getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);  }  /**   * <p>Returns the largest segment (measured by document   * count) that may be merged with other segments.</p>   *   * <p>Note that this method is a convenience method: it   * just calls mergePolicy.getMaxMergeDocs as long as   * mergePolicy is an instance of {@link LogMergePolicy}.   * Otherwise an IllegalArgumentException is thrown.</p>   *   * @see #setMaxMergeDocs   */  public int getMaxMergeDocs() {    return getLogMergePolicy().getMaxMergeDocs();  }  /**   * The maximum number of terms that will be indexed for a single field in a   * document.  This limits the amount of memory required for indexing, so that   * collections with very large files will not crash the indexing process by   * running out of memory.  This setting refers to the number of running terms,   * not to the number of different terms.<p/>   * <strong>Note:</strong> this silently truncates large documents, excluding from the   * index all terms that occur further in the document.  If you know your source   * documents are large, be sure to set this value high enough to accomodate   * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit   * is your memory, but you should anticipate an OutOfMemoryError.<p/>   * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms   * will be indexed for a field.   */  public void setMaxFieldLength(int maxFieldLength) {    ensureOpen();    this.maxFieldLength = maxFieldLength;    docWriter.setMaxFieldLength(maxFieldLength);    if (infoStream != null)      message("setMaxFieldLength " + maxFieldLength);  }  /**   * Returns the maximum number of terms that will be   * indexed for a single field in a document.   * @see #setMaxFieldLength   */  public int getMaxFieldLength() {    ensureOpen();    return maxFieldLength;  }  /** Determines the minimal number of documents required   * before the buffered in-memory documents are flushed as   * a new Segment.  Large values generally gives faster   * indexing.   *   * <p>When this is set, the writer will flush every   * maxBufferedDocs added documents.  Pass in {@link   * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due   * to number of buffered documents.  Note that if flushing   * by RAM usage is also enabled, then the flush will be   * triggered by whichever comes first.</p>   *   * <p>Disabled by default (writer flushes by RAM usage).</p>   *   * @throws IllegalArgumentException if maxBufferedDocs is   * enabled but smaller than 2, or it disables maxBufferedDocs   * when ramBufferSize is already disabled   * @see #setRAMBufferSizeMB   */  public void setMaxBufferedDocs(int maxBufferedDocs) {    ensureOpen();    if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)      throw new IllegalArgumentException(          "maxBufferedDocs must at least be 2 when enabled");    if (maxBufferedDocs == DISABLE_AUTO_FLUSH        && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH)      throw new IllegalArgumentException(          "at least one of ramBufferSize and maxBufferedDocs must be enabled");    docWriter.setMaxBufferedDocs(maxBufferedDocs);    pushMaxBufferedDocs();    if (infoStream != null)      message("setMaxBufferedDocs " + maxBufferedDocs);  }  /**   * If we are flushing by doc count (not by RAM usage), and   * using LogDocMergePolicy then push maxBufferedDocs down   * as its minMergeDocs, to keep backwards compatibility.   */  private void pushMaxBufferedDocs() {    if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) {      final MergePolicy mp = mergePolicy;      if (mp instanceof LogDocMergePolicy) {        LogDocMergePolicy lmp = (LogDocMergePolicy) mp;        final int maxBufferedDocs = docWriter.getMaxBufferedDocs();        if (lmp.getMinMergeDocs() != maxBufferedDocs) {          if (infoStream != null)            message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy");          lmp.setMinMergeDocs(maxBufferedDocs);        }      }    }  }  /**   * Returns the number of buffered added documents that will   * trigger a flush if enabled.   * @see #setMaxBufferedDocs   */  public int getMaxBufferedDocs() {    ensureOpen();    return docWriter.getMaxBufferedDocs();  }  /** Determines the amount of RAM that may be used for   * buffering added documents before they are flushed as a   * new Segment.  Generally for faster indexing performance   * it's best to flush by RAM usage instead of document   * count and use as large a RAM buffer as you can.   *   * <p>When this is set, the writer will flush whenever   * buffered documents use this much RAM.  Pass in {@link   * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due   * to RAM usage.  Note that if flushing by document count   * is also enabled, then the flush will be triggered by   * whichever comes first.</p>   *   * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>   *    * @throws IllegalArgumentException if ramBufferSize is   * enabled but non-positive, or it disables ramBufferSize   * when maxBufferedDocs is already disabled   */  public void setRAMBufferSizeMB(double mb) {    if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0)      throw new IllegalArgumentException(          "ramBufferSize should be > 0.0 MB when enabled");    if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH)      throw new IllegalArgumentException(          "at least one of ramBufferSize and maxBufferedDocs must be enabled");    docWriter.setRAMBufferSizeMB(mb);    if (infoStream != null)      message("setRAMBufferSizeMB " + mb);  }  /**   * Returns the value set by {@link #setRAMBufferSizeMB} if enabled.   */  public double getRAMBufferSizeMB() {    return docWriter.getRAMBufferSizeMB();  }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -