📄 indexwriter.java
字号:
* @param create <code>true</code> to create the index or overwrite * the existing one; <code>false</code> to append to the existing * index * @param deletionPolicy see <a href="#deletionPolicy">above</a> * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer * has this index open (<code>write.lock</code> could not * be obtained) * @throws IOException if the directory cannot be read/written to, or * if it does not exist and <code>create</code> is * <code>false</code> or if there is any other low-level * IO error * @deprecated This constructor will be removed in the 3.0 release. * Use {@link * #IndexWriter(Directory,Analyzer,boolean,IndexDeletionPolicy,MaxFieldLength)} * instead, and call {@link #commit()} when needed. */ public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, LockObtainFailedException, IOException { init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH); } private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength) throws CorruptIndexException, LockObtainFailedException, IOException { if (IndexReader.indexExists(d)) { init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength); } else { init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength); } } private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength) throws CorruptIndexException, LockObtainFailedException, IOException { this.closeDir = closeDir; directory = d; analyzer = a; setMessageID(defaultInfoStream); this.maxFieldLength = maxFieldLength; if (create) { // Clear the write lock in case it's leftover: directory.clearLock(WRITE_LOCK_NAME); } Lock writeLock = directory.makeLock(WRITE_LOCK_NAME); if (!writeLock.obtain(writeLockTimeout)) // obtain write lock throw new LockObtainFailedException("Index locked for write: " + writeLock); this.writeLock = writeLock; // save it try { if (create) { // Try to read first. This is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: try { segmentInfos.read(directory); segmentInfos.clear(); } catch (IOException e) { // Likely this means it's a fresh directory } segmentInfos.commit(directory); } else { segmentInfos.read(directory); // We assume that this segments_N was previously // properly sync'd: for(int i=0;i<segmentInfos.size();i++) { final SegmentInfo info = segmentInfos.info(i); List files = info.files(); for(int j=0;j<files.size();j++) synced.add(files.get(j)); } } this.autoCommit = autoCommit; setRollbackSegmentInfos(segmentInfos); docWriter = new DocumentsWriter(directory, this); docWriter.setInfoStream(infoStream); docWriter.setMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: deleter = new IndexFileDeleter(directory, deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, segmentInfos, infoStream, docWriter); pushMaxBufferedDocs(); if (infoStream != null) { message("init: create=" + create); messageState(); } } catch (IOException e) { this.writeLock.release(); this.writeLock = null; throw e; } } private synchronized void setRollbackSegmentInfos(SegmentInfos infos) { rollbackSegmentInfos = (SegmentInfos) infos.clone(); assert !hasExternalSegments(rollbackSegmentInfos); rollbackSegments = new HashMap(); final int size = rollbackSegmentInfos.size(); for(int i=0;i<size;i++) rollbackSegments.put(rollbackSegmentInfos.info(i), new Integer(i)); } /** * Expert: set the merge policy used by this writer. */ public void setMergePolicy(MergePolicy mp) { ensureOpen(); if (mp == null) throw new NullPointerException("MergePolicy must be non-null"); if (mergePolicy != mp) mergePolicy.close(); mergePolicy = mp; pushMaxBufferedDocs(); if (infoStream != null) message("setMergePolicy " + mp); } /** * Expert: returns the current MergePolicy in use by this writer. * @see #setMergePolicy */ public MergePolicy getMergePolicy() { ensureOpen(); return mergePolicy; } /** * Expert: set the merge scheduler used by this writer. */ synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException { ensureOpen(); if (mergeScheduler == null) throw new NullPointerException("MergeScheduler must be non-null"); if (this.mergeScheduler != mergeScheduler) { finishMerges(true); this.mergeScheduler.close(); } this.mergeScheduler = mergeScheduler; if (infoStream != null) message("setMergeScheduler " + mergeScheduler); } /** * Expert: returns the current MergePolicy in use by this * writer. * @see #setMergePolicy */ public MergeScheduler getMergeScheduler() { ensureOpen(); return mergeScheduler; } /** <p>Determines the largest segment (measured by * document count) that may be merged with other segments. * Small values (e.g., less than 10,000) are best for * interactive indexing, as this limits the length of * pauses while indexing to a few seconds. Larger values * are best for batched indexing and speedier * searches.</p> * * <p>The default value is {@link Integer#MAX_VALUE}.</p> * * <p>Note that this method is a convenience method: it * just calls mergePolicy.setMaxMergeDocs as long as * mergePolicy is an instance of {@link LogMergePolicy}. * Otherwise an IllegalArgumentException is thrown.</p> * * <p>The default merge policy ({@link * LogByteSizeMergePolicy}) also allows you to set this * limit by net size (in MB) of the segment, using {@link * LogByteSizeMergePolicy#setMaxMergeMB}.</p> */ public void setMaxMergeDocs(int maxMergeDocs) { getLogMergePolicy().setMaxMergeDocs(maxMergeDocs); } /** * <p>Returns the largest segment (measured by document * count) that may be merged with other segments.</p> * * <p>Note that this method is a convenience method: it * just calls mergePolicy.getMaxMergeDocs as long as * mergePolicy is an instance of {@link LogMergePolicy}. * Otherwise an IllegalArgumentException is thrown.</p> * * @see #setMaxMergeDocs */ public int getMaxMergeDocs() { return getLogMergePolicy().getMaxMergeDocs(); } /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by * running out of memory. This setting refers to the number of running terms, * not to the number of different terms.<p/> * <strong>Note:</strong> this silently truncates large documents, excluding from the * index all terms that occur further in the document. If you know your source * documents are large, be sure to set this value high enough to accomodate * the expected size. If you set it to Integer.MAX_VALUE, then the only limit * is your memory, but you should anticipate an OutOfMemoryError.<p/> * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms * will be indexed for a field. */ public void setMaxFieldLength(int maxFieldLength) { ensureOpen(); this.maxFieldLength = maxFieldLength; docWriter.setMaxFieldLength(maxFieldLength); if (infoStream != null) message("setMaxFieldLength " + maxFieldLength); } /** * Returns the maximum number of terms that will be * indexed for a single field in a document. * @see #setMaxFieldLength */ public int getMaxFieldLength() { ensureOpen(); return maxFieldLength; } /** Determines the minimal number of documents required * before the buffered in-memory documents are flushed as * a new Segment. Large values generally gives faster * indexing. * * <p>When this is set, the writer will flush every * maxBufferedDocs added documents. Pass in {@link * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due * to number of buffered documents. Note that if flushing * by RAM usage is also enabled, then the flush will be * triggered by whichever comes first.</p> * * <p>Disabled by default (writer flushes by RAM usage).</p> * * @throws IllegalArgumentException if maxBufferedDocs is * enabled but smaller than 2, or it disables maxBufferedDocs * when ramBufferSize is already disabled * @see #setRAMBufferSizeMB */ public void setMaxBufferedDocs(int maxBufferedDocs) { ensureOpen(); if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) throw new IllegalArgumentException( "maxBufferedDocs must at least be 2 when enabled"); if (maxBufferedDocs == DISABLE_AUTO_FLUSH && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) throw new IllegalArgumentException( "at least one of ramBufferSize and maxBufferedDocs must be enabled"); docWriter.setMaxBufferedDocs(maxBufferedDocs); pushMaxBufferedDocs(); if (infoStream != null) message("setMaxBufferedDocs " + maxBufferedDocs); } /** * If we are flushing by doc count (not by RAM usage), and * using LogDocMergePolicy then push maxBufferedDocs down * as its minMergeDocs, to keep backwards compatibility. */ private void pushMaxBufferedDocs() { if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) { final MergePolicy mp = mergePolicy; if (mp instanceof LogDocMergePolicy) { LogDocMergePolicy lmp = (LogDocMergePolicy) mp; final int maxBufferedDocs = docWriter.getMaxBufferedDocs(); if (lmp.getMinMergeDocs() != maxBufferedDocs) { if (infoStream != null) message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy"); lmp.setMinMergeDocs(maxBufferedDocs); } } } } /** * Returns the number of buffered added documents that will * trigger a flush if enabled. * @see #setMaxBufferedDocs */ public int getMaxBufferedDocs() { ensureOpen(); return docWriter.getMaxBufferedDocs(); } /** Determines the amount of RAM that may be used for * buffering added documents before they are flushed as a * new Segment. Generally for faster indexing performance * it's best to flush by RAM usage instead of document * count and use as large a RAM buffer as you can. * * <p>When this is set, the writer will flush whenever * buffered documents use this much RAM. Pass in {@link * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due * to RAM usage. Note that if flushing by document count * is also enabled, then the flush will be triggered by * whichever comes first.</p> * * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p> * * @throws IllegalArgumentException if ramBufferSize is * enabled but non-positive, or it disables ramBufferSize * when maxBufferedDocs is already disabled */ public void setRAMBufferSizeMB(double mb) { if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) throw new IllegalArgumentException( "ramBufferSize should be > 0.0 MB when enabled"); if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH) throw new IllegalArgumentException( "at least one of ramBufferSize and maxBufferedDocs must be enabled"); docWriter.setRAMBufferSizeMB(mb); if (infoStream != null) message("setRAMBufferSizeMB " + mb); } /** * Returns the value set by {@link #setRAMBufferSizeMB} if enabled. */ public double getRAMBufferSizeMB() { return docWriter.getRAMBufferSizeMB(); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -