📄 indexwriter.java
字号:
/** * Deletes the document(s) containing any of the * terms. All deletes are flushed at the same time. * @param terms array of terms to identify the documents * to be deleted * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public synchronized void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException { ensureOpen(); for (int i = 0; i < terms.length; i++) { bufferDeleteTerm(terms[i]); } maybeFlushRamSegments(); } /** * Updates a document by first deleting the document(s) * containing <code>term</code> and then adding the new * document. The delete and then add are atomic as seen * by a reader on the same index (flush may happen only after * the add). * @param term the term to identify the document(s) to be * deleted * @param doc the document to be added * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException { ensureOpen(); updateDocument(term, doc, getAnalyzer()); } /** * Updates a document by first deleting the document(s) * containing <code>term</code> and then adding the new * document. The delete and then add are atomic as seen * by a reader on the same index (flush may happen only after * the add). * @param term the term to identify the document(s) to be * deleted * @param doc the document to be added * @param analyzer the analyzer to use when analyzing the document * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void updateDocument(Term term, Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { ensureOpen(); SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer); synchronized (this) { bufferDeleteTerm(term); ramSegmentInfos.addElement(newSegmentInfo); maybeFlushRamSegments(); } } final synchronized String newRamSegmentName() { return "_ram_" + Integer.toString(ramSegmentInfos.counter++, Character.MAX_RADIX); } // for test purpose final synchronized int getSegmentCount(){ return segmentInfos.size(); } // for test purpose final synchronized int getRamSegmentCount(){ return ramSegmentInfos.size(); } // for test purpose final synchronized int getDocCount(int i) { if (i >= 0 && i < segmentInfos.size()) { return segmentInfos.info(i).docCount; } else { return -1; } } final synchronized String newSegmentName() { return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } /** Determines how often segment indices are merged by addDocument(). With * smaller values, less RAM is used while indexing, and searches on * unoptimized indices are faster, but indexing speed is slower. With larger * values, more RAM is used during indexing, and while searches on unoptimized * indices are slower, indexing is faster. Thus larger values (> 10) are best * for batch index creation, and smaller values (< 10) for indices that are * interactively maintained. * * <p>This must never be less than 2. The default value is {@link #DEFAULT_MERGE_FACTOR}. */ private int mergeFactor = DEFAULT_MERGE_FACTOR; /** Determines the minimal number of documents required before the buffered * in-memory documents are merging and a new Segment is created. * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory}, * large value gives faster indexing. At the same time, mergeFactor limits * the number of files open in a FSDirectory. * * <p> The default value is {@link #DEFAULT_MAX_BUFFERED_DOCS}. */ private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS; /** Determines the largest number of documents ever merged by addDocument(). * Small values (e.g., less than 10,000) are best for interactive indexing, * as this limits the length of pauses while indexing to a few seconds. * Larger values are best for batched indexing and speedier searches. * * <p>The default value is {@link #DEFAULT_MAX_MERGE_DOCS}. */ private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; /** If non-null, information about merges will be printed to this. */ private PrintStream infoStream = null; private static PrintStream defaultInfoStream = null; /** Merges all segments together into a single segment, * optimizing an index for search. * * <p>It is recommended that this method be called upon completion of indexing. In * environments with frequent updates, optimize is best done during low volume times, if at all. * * </p> * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p> * * <p>Note that this requires substantial temporary free * space in the Directory (see <a target="_top" * href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a> * for details):</p> * * <ul> * <li> * * <p>If no readers/searchers are open against the index, * then free space required is up to 1X the total size of * the starting index. For example, if the starting * index is 10 GB, then you must have up to 10 GB of free * space before calling optimize.</p> * * <li> * * <p>If readers/searchers are using the index, then free * space required is up to 2X the size of the starting * index. This is because in addition to the 1X used by * optimize, the original 1X of the starting index is * still consuming space in the Directory as the readers * are holding the segments files open. Even on Unix, * where it will appear as if the files are gone ("ls" * won't list them), they still consume storage due to * "delete on last close" semantics.</p> * * <p>Furthermore, if some but not all readers re-open * while the optimize is underway, this will cause > 2X * temporary space to be consumed as those new readers * will then hold open the partially optimized segments at * that time. It is best not to re-open readers while * optimize is running.</p> * * </ul> * * <p>The actual temporary usage could be much less than * these figures (it depends on many factors).</p> * * <p>Once the optimize completes, the total size of the * index will be less than the size of the starting index. * It could be quite a bit smaller (if there were many * pending deletes) or just slightly smaller.</p> * * <p>If an Exception is hit during optimize(), for example * due to disk full, the index will not be corrupt and no * documents will have been lost. However, it may have * been partially optimized (some segments were merged but * not all), and it's possible that one of the segments in * the index will be in non-compound format even when * using compound file format. This will occur when the * Exception is hit during conversion of the segment into * compound format.</p> * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public synchronized void optimize() throws CorruptIndexException, IOException { ensureOpen(); flushRamSegments(); while (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && (SegmentReader.hasDeletions(segmentInfos.info(0)) || SegmentReader.hasSeparateNorms(segmentInfos.info(0)) || segmentInfos.info(0).dir != directory || (useCompoundFile && (!SegmentReader.usesCompoundFile(segmentInfos.info(0))))))) { int minSegment = segmentInfos.size() - mergeFactor; mergeSegments(segmentInfos, minSegment < 0 ? 0 : minSegment, segmentInfos.size()); } } /* * Begin a transaction. During a transaction, any segment * merges that happen (or ram segments flushed) will not * write a new segments file and will not remove any files * that were present at the start of the transaction. You * must make a matched (try/finally) call to * commitTransaction() or rollbackTransaction() to finish * the transaction. */ private void startTransaction() throws IOException { localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); localAutoCommit = autoCommit; if (localAutoCommit) { flushRamSegments(); // Turn off auto-commit during our local transaction: autoCommit = false; } else // We must "protect" our files at this point from // deletion in case we need to rollback: deleter.incRef(segmentInfos, false); } /* * Rolls back the transaction and restores state to where * we were at the start. */ private void rollbackTransaction() throws IOException { // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write once"). segmentInfos.clear(); segmentInfos.addAll(localRollbackSegmentInfos); localRollbackSegmentInfos = null; // Ask deleter to locate unreferenced files we had // created & remove them: deleter.checkpoint(segmentInfos, false); if (!autoCommit) // Remove the incRef we did in startTransaction: deleter.decRef(segmentInfos); deleter.refresh(); } /* * Commits the transaction. This will write the new * segments file and remove and pending deletions we have * accumulated during the transaction */ private void commitTransaction() throws IOException { // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; boolean success = false; try { checkpoint(); success = true; } finally { if (!success) { rollbackTransaction(); } } if (!autoCommit) // Remove the incRef we did in startTransaction. deleter.decRef(localRollbackSegmentInfos); localRollbackSegmentInfos = null; // Give deleter a chance to remove files now: deleter.checkpoint(segmentInfos, autoCommit); } /** * Close the <code>IndexWriter</code> without committing * any of the changes that have occurred since it was * opened. This removes any temporary files that had been * created, after which the state of the index will be the * same as it was when this writer was first opened. This * can only be called when this IndexWriter was opened * with <code>autoCommit=false</code>. * @throws IllegalStateException if this is called when * the writer was opened with <code>autoCommit=true</code>. * @throws IOException if there is a low-level IO error */ public synchronized void abort() throws IOException { ensureOpen(); if (!autoCommit) { // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write once"). segmentInfos.clear(); segmentInfos.addAll(rollbackSegmentInfos); // Ask deleter to locate unreferenced files & remove // them: deleter.checkpoint(segmentInfos, false); deleter.refresh(); ramSegmentInfos = new SegmentInfos(); bufferedDeleteTerms.clear(); numBufferedDeleteTerms = 0; commitPending = false; close(); } else { throw new IllegalStateException("abort() can only be called when IndexWriter was opened with autoCommit=false"); } } /* * Called whenever the SegmentInfos has been updated and * the index files referenced exist (correctly) in the * index directory. If we are in autoCommit mode, we * commit the change immediately. Else, we mark * commitPending. */ private void checkpoint() throws IOException { if (autoCommit) { segmentInfos.write(directory); } else { commitPending = true; } } /** Merges all segments from an array of indexes into this index. * * <p>This may be used to parallelize batch indexing. A large document
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -