📄 indexwriter.java
字号:
} catch (IOException e) { // Likely this means it's a fresh directory } segmentInfos.write(directory); } else { segmentInfos.read(directory); } this.autoCommit = autoCommit; if (!autoCommit) { rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); } docWriter = new DocumentsWriter(directory, this); docWriter.setInfoStream(infoStream); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: deleter = new IndexFileDeleter(directory, deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, segmentInfos, infoStream, docWriter); pushMaxBufferedDocs(); if (infoStream != null) { message("init: create=" + create); messageState(); } } catch (IOException e) { this.writeLock.release(); this.writeLock = null; throw e; } } /** * Expert: set the merge policy used by this writer. */ public void setMergePolicy(MergePolicy mp) { ensureOpen(); if (mp == null) throw new NullPointerException("MergePolicy must be non-null"); if (mergePolicy != mp) mergePolicy.close(); mergePolicy = mp; pushMaxBufferedDocs(); if (infoStream != null) message("setMergePolicy " + mp); } /** * Expert: returns the current MergePolicy in use by this writer. * @see #setMergePolicy */ public MergePolicy getMergePolicy() { ensureOpen(); return mergePolicy; } /** * Expert: set the merge scheduler used by this writer. */ public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException { ensureOpen(); if (mergeScheduler == null) throw new NullPointerException("MergeScheduler must be non-null"); if (this.mergeScheduler != mergeScheduler) { finishMerges(true); this.mergeScheduler.close(); } this.mergeScheduler = mergeScheduler; if (infoStream != null) message("setMergeScheduler " + mergeScheduler); } /** * Expert: returns the current MergePolicy in use by this * writer. * @see #setMergePolicy */ public MergeScheduler getMergeScheduler() { ensureOpen(); return mergeScheduler; } /** <p>Determines the largest segment (measured by * document count) that may be merged with other segments. * Small values (e.g., less than 10,000) are best for * interactive indexing, as this limits the length of * pauses while indexing to a few seconds. Larger values * are best for batched indexing and speedier * searches.</p> * * <p>The default value is {@link Integer#MAX_VALUE}.</p> * * <p>Note that this method is a convenience method: it * just calls mergePolicy.setMaxMergeDocs as long as * mergePolicy is an instance of {@link LogMergePolicy}. * Otherwise an IllegalArgumentException is thrown.</p> * * <p>The default merge policy ({@link * LogByteSizeMergePolicy}) also allows you to set this * limit by net size (in MB) of the segment, using {@link * LogByteSizeMergePolicy#setMaxMergeMB}.</p> */ public void setMaxMergeDocs(int maxMergeDocs) { getLogMergePolicy().setMaxMergeDocs(maxMergeDocs); } /** * <p>Returns the largest segment (measured by document * count) that may be merged with other segments.</p> * * <p>Note that this method is a convenience method: it * just calls mergePolicy.getMaxMergeDocs as long as * mergePolicy is an instance of {@link LogMergePolicy}. * Otherwise an IllegalArgumentException is thrown.</p> * * @see #setMaxMergeDocs */ public int getMaxMergeDocs() { return getLogMergePolicy().getMaxMergeDocs(); } /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by * running out of memory. This setting refers to the number of running terms, * not to the number of different terms.<p/> * <strong>Note:</strong> this silently truncates large documents, excluding from the * index all terms that occur further in the document. If you know your source * documents are large, be sure to set this value high enough to accomodate * the expected size. If you set it to Integer.MAX_VALUE, then the only limit * is your memory, but you should anticipate an OutOfMemoryError.<p/> * By default, no more than 10,000 terms will be indexed for a field. */ public void setMaxFieldLength(int maxFieldLength) { ensureOpen(); this.maxFieldLength = maxFieldLength; if (infoStream != null) message("setMaxFieldLength " + maxFieldLength); } /** * Returns the maximum number of terms that will be * indexed for a single field in a document. * @see #setMaxFieldLength */ public int getMaxFieldLength() { ensureOpen(); return maxFieldLength; } /** Determines the minimal number of documents required * before the buffered in-memory documents are flushed as * a new Segment. Large values generally gives faster * indexing. * * <p>When this is set, the writer will flush every * maxBufferedDocs added documents. Pass in {@link * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due * to number of buffered documents. Note that if flushing * by RAM usage is also enabled, then the flush will be * triggered by whichever comes first.</p> * * <p>Disabled by default (writer flushes by RAM usage).</p> * * @throws IllegalArgumentException if maxBufferedDocs is * enabled but smaller than 2, or it disables maxBufferedDocs * when ramBufferSize is already disabled * @see #setRAMBufferSizeMB */ public void setMaxBufferedDocs(int maxBufferedDocs) { ensureOpen(); if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) throw new IllegalArgumentException( "maxBufferedDocs must at least be 2 when enabled"); if (maxBufferedDocs == DISABLE_AUTO_FLUSH && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) throw new IllegalArgumentException( "at least one of ramBufferSize and maxBufferedDocs must be enabled"); docWriter.setMaxBufferedDocs(maxBufferedDocs); pushMaxBufferedDocs(); if (infoStream != null) message("setMaxBufferedDocs " + maxBufferedDocs); } /** * If we are flushing by doc count (not by RAM usage), and * using LogDocMergePolicy then push maxBufferedDocs down * as its minMergeDocs, to keep backwards compatibility. */ private void pushMaxBufferedDocs() { if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) { final MergePolicy mp = mergePolicy; if (mp instanceof LogDocMergePolicy) { LogDocMergePolicy lmp = (LogDocMergePolicy) mp; final int maxBufferedDocs = docWriter.getMaxBufferedDocs(); if (lmp.getMinMergeDocs() != maxBufferedDocs) { if (infoStream != null) message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy"); lmp.setMinMergeDocs(maxBufferedDocs); } } } } /** * Returns the number of buffered added documents that will * trigger a flush if enabled. * @see #setMaxBufferedDocs */ public int getMaxBufferedDocs() { ensureOpen(); return docWriter.getMaxBufferedDocs(); } /** Determines the amount of RAM that may be used for * buffering added documents before they are flushed as a * new Segment. Generally for faster indexing performance * it's best to flush by RAM usage instead of document * count and use as large a RAM buffer as you can. * * <p>When this is set, the writer will flush whenever * buffered documents use this much RAM. Pass in {@link * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due * to RAM usage. Note that if flushing by document count * is also enabled, then the flush will be triggered by * whichever comes first.</p> * * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p> * * @throws IllegalArgumentException if ramBufferSize is * enabled but non-positive, or it disables ramBufferSize * when maxBufferedDocs is already disabled */ public void setRAMBufferSizeMB(double mb) { if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) throw new IllegalArgumentException( "ramBufferSize should be > 0.0 MB when enabled"); if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH) throw new IllegalArgumentException( "at least one of ramBufferSize and maxBufferedDocs must be enabled"); docWriter.setRAMBufferSizeMB(mb); if (infoStream != null) message("setRAMBufferSizeMB " + mb); } /** * Returns the value set by {@link #setRAMBufferSizeMB} if enabled. */ public double getRAMBufferSizeMB() { return docWriter.getRAMBufferSizeMB(); } /** * <p>Determines the minimal number of delete terms required before the buffered * in-memory delete terms are applied and flushed. If there are documents * buffered in memory at the time, they are merged and a new segment is * created.</p> * <p>Disabled by default (writer flushes by RAM usage).</p> * * @throws IllegalArgumentException if maxBufferedDeleteTerms * is enabled but smaller than 1 * @see #setRAMBufferSizeMB */ public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { ensureOpen(); if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) throw new IllegalArgumentException( "maxBufferedDeleteTerms must at least be 1 when enabled"); docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); if (infoStream != null) message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms); } /** * Returns the number of buffered deleted terms that will * trigger a flush if enabled. * @see #setMaxBufferedDeleteTerms */ public int getMaxBufferedDeleteTerms() { ensureOpen(); return docWriter.getMaxBufferedDeleteTerms(); } /** Determines how often segment indices are merged by addDocument(). With * smaller values, less RAM is used while indexing, and searches on * unoptimized indices are faster, but indexing speed is slower. With larger * values, more RAM is used during indexing, and while searches on unoptimized * indices are slower, indexing is faster. Thus larger values (> 10) are best * for batch index creation, and smaller values (< 10) for indices that are * interactively maintained. * * <p>Note that this method is a convenience method: it * just calls mergePolicy.setMergeFactor as long as * mergePolicy is an instance of {@link LogMergePolicy}. * Otherwise an IllegalArgumentException is thrown.</p> * * <p>This must never be less than 2. The default value is 10. */ public void setMergeFactor(int mergeFactor) { getLogMergePolicy().setMergeFactor(mergeFactor); } /** * <p>Returns the number of segments that are merged at * once and also controls the total number of segments * allowed to accumulate in the index.</p> * * <p>Note that this method is a convenience method: it * just calls mergePolicy.getMergeFactor as long as * mergePolicy is an instance of {@link LogMergePolicy}. * Otherwise an IllegalArgumentException is thrown.</p> * * @see #setMergeFactor */ public int getMergeFactor() { return getLogMergePolicy().getMergeFactor(); } /** If non-null, this will be the default infoStream used * by a newly instantiated IndexWriter. * @see #setInfoStream */ public static void setDefaultInfoStream(PrintStream infoStream) { IndexWriter.defaultInfoStream = infoStream; } /** * Returns the current default infoStream for newly * instantiated IndexWriters. * @see #setDefaultInfoStream */ public static PrintStream getDefaultInfoStream() { return IndexWriter.defaultInfoStream; } /** If non-null, information about merges, deletes and a * message when maxFieldLength is reached will be printed * to this. */ public void setInfoStream(PrintStream infoStream) { ensureOpen(); this.infoStream = infoStream; setMessageID();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -