📄 indexwriter.java

📁 lucene2.2.0版本
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
   * in-memory documents are merged and a new Segment is created.   * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},   * large value gives faster indexing.  At the same time, mergeFactor limits   * the number of files open in a FSDirectory.   *   * <p> The default value is 10.   *   * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2   */  public void setMaxBufferedDocs(int maxBufferedDocs) {    ensureOpen();    if (maxBufferedDocs < 2)      throw new IllegalArgumentException("maxBufferedDocs must at least be 2");    this.minMergeDocs = maxBufferedDocs;  }  /**   * Returns the number of buffered added documents that will   * trigger a flush.   * @see #setMaxBufferedDocs   */  public int getMaxBufferedDocs() {    ensureOpen();    return minMergeDocs;  }  /**   * <p>Determines the minimal number of delete terms required before the buffered   * in-memory delete terms are applied and flushed. If there are documents   * buffered in memory at the time, they are merged and a new segment is   * created.</p>   * <p>The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}.   * @throws IllegalArgumentException if maxBufferedDeleteTerms is smaller than 1</p>   */  public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {    ensureOpen();    if (maxBufferedDeleteTerms < 1)      throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1");    this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;  }  /**   * Returns the number of buffered deleted terms that will   * trigger a flush.   * @see #setMaxBufferedDeleteTerms   */  public int getMaxBufferedDeleteTerms() {    ensureOpen();    return maxBufferedDeleteTerms;  }  /** Determines how often segment indices are merged by addDocument().  With   * smaller values, less RAM is used while indexing, and searches on   * unoptimized indices are faster, but indexing speed is slower.  With larger   * values, more RAM is used during indexing, and while searches on unoptimized   * indices are slower, indexing is faster.  Thus larger values (> 10) are best   * for batch index creation, and smaller values (< 10) for indices that are   * interactively maintained.   *   * <p>This must never be less than 2.  The default value is 10.   */  public void setMergeFactor(int mergeFactor) {    ensureOpen();    if (mergeFactor < 2)      throw new IllegalArgumentException("mergeFactor cannot be less than 2");    this.mergeFactor = mergeFactor;  }  /**   * Returns the number of segments that are merged at once   * and also controls the total number of segments allowed   * to accumulate in the index.   * @see #setMergeFactor   */  public int getMergeFactor() {    ensureOpen();    return mergeFactor;  }  /** If non-null, this will be the default infoStream used   * by a newly instantiated IndexWriter.   * @see #setInfoStream   */  public static void setDefaultInfoStream(PrintStream infoStream) {    IndexWriter.defaultInfoStream = infoStream;  }  /**   * Returns the current default infoStream for newly   * instantiated IndexWriters.   * @see #setDefaultInfoStream   */  public static PrintStream getDefaultInfoStream() {    return IndexWriter.defaultInfoStream;  }  /** If non-null, information about merges, deletes and a   * message when maxFieldLength is reached will be printed   * to this.   */  public void setInfoStream(PrintStream infoStream) {    ensureOpen();    this.infoStream = infoStream;    deleter.setInfoStream(infoStream);  }  /**   * Returns the current infoStream in use by this writer.   * @see #setInfoStream   */  public PrintStream getInfoStream() {    ensureOpen();    return infoStream;  }  /**   * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter.  @see   * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.   */  public void setWriteLockTimeout(long writeLockTimeout) {    ensureOpen();    this.writeLockTimeout = writeLockTimeout;  }  /**   * Returns allowed timeout when acquiring the write lock.   * @see #setWriteLockTimeout   */  public long getWriteLockTimeout() {    ensureOpen();    return writeLockTimeout;  }  /**   * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in   * milliseconds).   */  public static void setDefaultWriteLockTimeout(long writeLockTimeout) {    IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;  }  /**   * Returns default write lock timeout for newly   * instantiated IndexWriters.   * @see #setDefaultWriteLockTimeout   */  public static long getDefaultWriteLockTimeout() {    return IndexWriter.WRITE_LOCK_TIMEOUT;  }  /**   * Flushes all changes to an index and closes all   * associated files.   *   * <p> If an Exception is hit during close, eg due to disk   * full or some other reason, then both the on-disk index   * and the internal state of the IndexWriter instance will   * be consistent.  However, the close will not be complete   * even though part of it (flushing buffered documents)   * may have succeeded, so the write lock will still be   * held.</p>   *    * <p> If you can correct the underlying cause (eg free up   * some disk space) then you can call close() again.   * Failing that, if you want to force the write lock to be   * released (dangerous, because you may then lose buffered   * docs in the IndexWriter instance) then you can do   * something like this:</p>   *   * <pre>   * try {   *   writer.close();   * } finally {   *   if (IndexReader.isLocked(directory)) {   *     IndexReader.unlock(directory);   *   }   * }   * </pre>   *   * after which, you must be certain not to use the writer   * instance anymore.</p>   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public synchronized void close() throws CorruptIndexException, IOException {    if (!closed) {      flushRamSegments();      if (commitPending) {        segmentInfos.write(directory);         // now commit changes        deleter.checkpoint(segmentInfos, true);        commitPending = false;        rollbackSegmentInfos = null;      }      ramDirectory.close();      if (writeLock != null) {        writeLock.release();                          // release write lock        writeLock = null;      }      closed = true;      if(closeDir)        directory.close();    }  }  /** Release the write lock, if needed. */  protected void finalize() throws Throwable {    try {      if (writeLock != null) {        writeLock.release();                        // release write lock        writeLock = null;      }    } finally {      super.finalize();    }  }  /** Returns the Directory used by this index. */  public Directory getDirectory() {    ensureOpen();    return directory;  }  /** Returns the analyzer used by this index. */  public Analyzer getAnalyzer() {    ensureOpen();    return analyzer;  }  /** Returns the number of documents currently in this index. */  public synchronized int docCount() {    ensureOpen();    int count = ramSegmentInfos.size();    for (int i = 0; i < segmentInfos.size(); i++) {      SegmentInfo si = segmentInfos.info(i);      count += si.docCount;    }    return count;  }  /**   * The maximum number of terms that will be indexed for a single field in a   * document.  This limits the amount of memory required for indexing, so that   * collections with very large files will not crash the indexing process by   * running out of memory.<p/>   * Note that this effectively truncates large documents, excluding from the   * index terms that occur further in the document.  If you know your source   * documents are large, be sure to set this value high enough to accomodate   * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit   * is your memory, but you should anticipate an OutOfMemoryError.<p/>   * By default, no more than 10,000 terms will be indexed for a field.   *   */  private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;  /**   * Adds a document to this index.  If the document contains more than   * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are   * discarded.   *   * <p> Note that if an Exception is hit (for example disk full)   * then the index will be consistent, but this document   * may not have been added.  Furthermore, it's possible   * the index will have one segment in non-compound format   * even when using compound files (when a merge has   * partially succeeded).</p>   *   * <p> This method periodically flushes pending documents   * to the Directory (every {@link #setMaxBufferedDocs}),   * and also periodically merges segments in the index   * (every {@link #setMergeFactor} flushes).  When this   * occurs, the method will take more time to run (possibly   * a long time if the index is large), and will require   * free temporary space in the Directory to do the   * merging.</p>   *   * <p>The amount of free space required when a merge is   * triggered is up to 1X the size of all segments being   * merged, when no readers/searchers are open against the   * index, and up to 2X the size of all segments being   * merged when readers/searchers are open against the   * index (see {@link #optimize()} for details).  Most   * merges are small (merging the smallest segments   * together), but whenever a full merge occurs (all   * segments in the index, which is the worst case for   * temporary space usage) then the maximum free disk space   * required is the same as {@link #optimize}.</p>   *   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void addDocument(Document doc) throws CorruptIndexException, IOException {    addDocument(doc, analyzer);  }  /**   * Adds a document to this index, using the provided analyzer instead of the   * value of {@link #getAnalyzer()}.  If the document contains more than   * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are   * discarded.   *   * <p>See {@link #addDocument(Document)} for details on   * index and IndexWriter state after an Exception, and   * flushing/merging temporary free space requirements.</p>   *   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {    ensureOpen();    SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);    synchronized (this) {      ramSegmentInfos.addElement(newSegmentInfo);      maybeFlushRamSegments();    }  }  SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer)      throws CorruptIndexException, IOException {    DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);    dw.setInfoStream(infoStream);    String segmentName = newRamSegmentName();    dw.addDocument(segmentName, doc);    SegmentInfo si = new SegmentInfo(segmentName, 1, ramDirectory, false, false);    si.setNumFields(dw.getNumFields());    return si;  }  /**   * Deletes the document(s) containing <code>term</code>.   * @param term the term to identify the documents to be deleted   * @throws CorruptIndexException if the index is corrupt   * @throws IOException if there is a low-level IO error   */  public synchronized void deleteDocuments(Term term) throws CorruptIndexException, IOException {    ensureOpen();    bufferDeleteTerm(term);    maybeFlushRamSegments();  }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -