📄 indexwriter.java
字号:
* * <p> Note that if an Exception is hit (for example disk full) * then the index will be consistent, but this document * may not have been added. Furthermore, it's possible * the index will have one segment in non-compound format * even when using compound files (when a merge has * partially succeeded).</p> * * <p> This method periodically flushes pending documents * to the Directory (every {@link #setMaxBufferedDocs}), * and also periodically merges segments in the index * (every {@link #setMergeFactor} flushes). When this * occurs, the method will take more time to run (possibly * a long time if the index is large), and will require * free temporary space in the Directory to do the * merging.</p> * * <p>The amount of free space required when a merge is triggered is * up to 1X the size of all segments being merged, when no * readers/searchers are open against the index, and up to 2X the * size of all segments being merged when readers/searchers are open * against the index (see {@link #optimize()} for details). The * sequence of primitive merge operations performed is governed by * the merge policy. * * <p>Note that each term in the document can be no longer * than 16383 characters, otherwise an * IllegalArgumentException will be thrown.</p> * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void addDocument(Document doc) throws CorruptIndexException, IOException { addDocument(doc, analyzer); } /** * Adds a document to this index, using the provided analyzer instead of the * value of {@link #getAnalyzer()}. If the document contains more than * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are * discarded. * * <p>See {@link #addDocument(Document)} for details on * index and IndexWriter state after an Exception, and * flushing/merging temporary free space requirements.</p> * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { ensureOpen(); boolean doFlush = false; boolean success = false; try { try { doFlush = docWriter.addDocument(doc, analyzer); success = true; } finally { if (!success) { if (infoStream != null) message("hit exception adding document"); synchronized (this) { // If docWriter has some aborted files that were // never incref'd, then we clean them up here if (docWriter != null) { final List files = docWriter.abortedFiles(); if (files != null) deleter.deleteNewFiles(files); } } } } if (doFlush) flush(true, false); } catch (OutOfMemoryError oom) { hitOOM = true; throw oom; } } /** * Deletes the document(s) containing <code>term</code>. * @param term the term to identify the documents to be deleted * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void deleteDocuments(Term term) throws CorruptIndexException, IOException { ensureOpen(); try { boolean doFlush = docWriter.bufferDeleteTerm(term); if (doFlush) flush(true, false); } catch (OutOfMemoryError oom) { hitOOM = true; throw oom; } } /** * Deletes the document(s) containing any of the * terms. All deletes are flushed at the same time. * @param terms array of terms to identify the documents * to be deleted * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException { ensureOpen(); try { boolean doFlush = docWriter.bufferDeleteTerms(terms); if (doFlush) flush(true, false); } catch (OutOfMemoryError oom) { hitOOM = true; throw oom; } } /** * Updates a document by first deleting the document(s) * containing <code>term</code> and then adding the new * document. The delete and then add are atomic as seen * by a reader on the same index (flush may happen only after * the add). * @param term the term to identify the document(s) to be * deleted * @param doc the document to be added * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException { ensureOpen(); updateDocument(term, doc, getAnalyzer()); } /** * Updates a document by first deleting the document(s) * containing <code>term</code> and then adding the new * document. The delete and then add are atomic as seen * by a reader on the same index (flush may happen only after * the add). * @param term the term to identify the document(s) to be * deleted * @param doc the document to be added * @param analyzer the analyzer to use when analyzing the document * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void updateDocument(Term term, Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { ensureOpen(); try { boolean doFlush = false; boolean success = false; try { doFlush = docWriter.updateDocument(term, doc, analyzer); success = true; } finally { if (!success) { if (infoStream != null) message("hit exception updating document"); synchronized (this) { // If docWriter has some aborted files that were // never incref'd, then we clean them up here final List files = docWriter.abortedFiles(); if (files != null) deleter.deleteNewFiles(files); } } } if (doFlush) flush(true, false); } catch (OutOfMemoryError oom) { hitOOM = true; throw oom; } } // for test purpose final synchronized int getSegmentCount(){ return segmentInfos.size(); } // for test purpose final synchronized int getNumBufferedDocuments(){ return docWriter.getNumDocsInRAM(); } // for test purpose final synchronized int getDocCount(int i) { if (i >= 0 && i < segmentInfos.size()) { return segmentInfos.info(i).docCount; } else { return -1; } } final String newSegmentName() { // Cannot synchronize on IndexWriter because that causes // deadlock synchronized(segmentInfos) { // Important to set commitPending so that the // segmentInfos is written on close. Otherwise we // could close, re-open and re-return the same segment // name that was previously returned which can cause // problems at least with ConcurrentMergeScheduler. commitPending = true; return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } } /** If non-null, information about merges will be printed to this. */ private PrintStream infoStream = null; private static PrintStream defaultInfoStream = null; /** * Requests an "optimize" operation on an index, priming the index * for the fastest available search. Traditionally this has meant * merging all segments into a single segment as is done in the * default merge policy, but individaul merge policies may implement * optimize in different ways. * * @see LogMergePolicy#findMergesForOptimize * * <p>It is recommended that this method be called upon completion of indexing. In * environments with frequent updates, optimize is best done during low volume times, if at all. * * </p> * <p>See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion. </p> * * <p>Note that this can require substantial temporary free * space in the Directory (see <a target="_top" * href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a> * for details):</p> * * <ul> * <li> * * <p>If no readers/searchers are open against the index, * then free space required is up to 1X the total size of * the starting index. For example, if the starting * index is 10 GB, then you must have up to 10 GB of free * space before calling optimize.</p> * * <li> * * <p>If readers/searchers are using the index, then free * space required is up to 2X the size of the starting * index. This is because in addition to the 1X used by * optimize, the original 1X of the starting index is * still consuming space in the Directory as the readers * are holding the segments files open. Even on Unix, * where it will appear as if the files are gone ("ls" * won't list them), they still consume storage due to * "delete on last close" semantics.</p> * * <p>Furthermore, if some but not all readers re-open * while the optimize is underway, this will cause > 2X * temporary space to be consumed as those new readers * will then hold open the partially optimized segments at * that time. It is best not to re-open readers while * optimize is running.</p> * * </ul> * * <p>The actual temporary usage could be much less than * these figures (it depends on many factors).</p> * * <p>In general, once the optimize completes, the total size of the * index will be less than the size of the starting index. * It could be quite a bit smaller (if there were many * pending deletes) or just slightly smaller.</p> * * <p>If an Exception is hit during optimize(), for example * due to disk full, the index will not be corrupt and no * documents will have been lost. However, it may have * been partially optimized (some segments were merged but * not all), and it's possible that one of the segments in * the index will be in non-compound format even when * using compound file format. This will occur when the * Exception is hit during conversion of the segment into * compound format.</p> * * <p>This call will optimize those segments present in * the index when the call started. If other threads are * still adding documents and flushing segments, those * newly created segments will not be optimized unless you * call optimize again.</p> * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public void optimize() throws CorruptIndexException, IOException { optimize(true); } /** * Optimize the index down to <= maxNumSegments. If * maxNumSegments==1 then this is the same as {@link * #optimize()}. * @param maxNumSegments maximum number of segments left * in the index after optimization finishes */ public void optimize(int maxNumSegments) throws CorruptIndexException, IOException { optimize(maxNumSegments, true); } /** Just like {@link #optimize()}, except you can specify * whether the call should block until the optimize * completes. This is only meaningful with a * {@link MergeScheduler} that is able to run merges in * background threads. */ public void optimize(boolean doWait) throws CorruptIndexException, IOException { optimize(1, doWait); } /** Just like {@link #optimize(int)}, except you can * specify whether the call should block until the * optimize completes. This is only meaningful with a * {@link MergeScheduler} that is able to run merges in * background threads. */ public void optimize(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException { ensureOpen(); if (maxNumSegments < 1) throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments); if (infoStream != null) message("optimize: index now " + segString()); flush(); synchronized(this) { resetMergeExceptions(); segmentsToOptimize = new HashSet(); final int numSegments = segmentInfos.size(); for(int i=0;i<numSegments;i++) segmentsToOptimize.add(segmentInfos.info(i)); // Now mark all pending & running merges as optimize // merge: Iterator it = pendingMerges.iterator(); while(it.hasNext()) { final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next(); merge.optimize = true; merge.maxNumSegmentsOptimize = maxNumSegments;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -