📄 indexwriter.java
字号:
this.closeDir = closeDir; directory = d; analyzer = a; if (create) { // Clear the write lock in case it's leftover: directory.clearLock(IndexWriter.WRITE_LOCK_NAME); } Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME); if (!writeLock.obtain(writeLockTimeout)) // obtain write lock throw new IOException("Index locked for write: " + writeLock); this.writeLock = writeLock; // save it try { if (create) { // Try to read first. This is to allow create // against an index that's currently open for // searching. In this case we write the next // segments_N file with no segments: try { segmentInfos.read(directory); segmentInfos.clear(); } catch (IOException e) { // Likely this means it's a fresh directory } segmentInfos.write(directory); } else { segmentInfos.read(directory); } // Create a deleter to keep track of which files can // be deleted: deleter = new IndexFileDeleter(segmentInfos, directory); deleter.setInfoStream(infoStream); deleter.findDeletableFiles(); deleter.deleteFiles(); } catch (IOException e) { this.writeLock.release(); this.writeLock = null; throw e; } } /** Determines the largest number of documents ever merged by addDocument(). * Small values (e.g., less than 10,000) are best for interactive indexing, * as this limits the length of pauses while indexing to a few seconds. * Larger values are best for batched indexing and speedier searches. * * <p>The default value is {@link Integer#MAX_VALUE}. */ public void setMaxMergeDocs(int maxMergeDocs) { this.maxMergeDocs = maxMergeDocs; } /** * @see #setMaxMergeDocs */ public int getMaxMergeDocs() { return maxMergeDocs; } /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by * running out of memory.<p/> * Note that this effectively truncates large documents, excluding from the * index terms that occur further in the document. If you know your source * documents are large, be sure to set this value high enough to accomodate * the expected size. If you set it to Integer.MAX_VALUE, then the only limit * is your memory, but you should anticipate an OutOfMemoryError.<p/> * By default, no more than 10,000 terms will be indexed for a field. */ public void setMaxFieldLength(int maxFieldLength) { this.maxFieldLength = maxFieldLength; } /** * @see #setMaxFieldLength */ public int getMaxFieldLength() { return maxFieldLength; } /** Determines the minimal number of documents required before the buffered * in-memory documents are merged and a new Segment is created. * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory}, * large value gives faster indexing. At the same time, mergeFactor limits * the number of files open in a FSDirectory. * * <p> The default value is 10. * * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2 */ public void setMaxBufferedDocs(int maxBufferedDocs) { if (maxBufferedDocs < 2) throw new IllegalArgumentException("maxBufferedDocs must at least be 2"); this.minMergeDocs = maxBufferedDocs; } /** * @see #setMaxBufferedDocs */ public int getMaxBufferedDocs() { return minMergeDocs; } /** * <p>Determines the minimal number of delete terms required before the buffered * in-memory delete terms are applied and flushed. If there are documents * buffered in memory at the time, they are merged and a new segment is * created.</p> * <p>The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}. * @throws IllegalArgumentException if maxBufferedDeleteTerms is smaller than 1</p> */ public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { if (maxBufferedDeleteTerms < 1) throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1"); this.maxBufferedDeleteTerms = maxBufferedDeleteTerms; } /** * @see #setMaxBufferedDeleteTerms */ public int getMaxBufferedDeleteTerms() { return maxBufferedDeleteTerms; } /** Determines how often segment indices are merged by addDocument(). With * smaller values, less RAM is used while indexing, and searches on * unoptimized indices are faster, but indexing speed is slower. With larger * values, more RAM is used during indexing, and while searches on unoptimized * indices are slower, indexing is faster. Thus larger values (> 10) are best * for batch index creation, and smaller values (< 10) for indices that are * interactively maintained. * * <p>This must never be less than 2. The default value is 10. */ public void setMergeFactor(int mergeFactor) { if (mergeFactor < 2) throw new IllegalArgumentException("mergeFactor cannot be less than 2"); this.mergeFactor = mergeFactor; } /** * @see #setMergeFactor */ public int getMergeFactor() { return mergeFactor; } /** If non-null, information about merges and a message when * maxFieldLength is reached will be printed to this. */ public void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; } /** * @see #setInfoStream */ public PrintStream getInfoStream() { return infoStream; } /** * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter. */ public void setWriteLockTimeout(long writeLockTimeout) { this.writeLockTimeout = writeLockTimeout; } /** * @see #setWriteLockTimeout */ public long getWriteLockTimeout() { return writeLockTimeout; } /** * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in * milliseconds). */ public static void setDefaultWriteLockTimeout(long writeLockTimeout) { IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout; } /** * @see #setDefaultWriteLockTimeout */ public static long getDefaultWriteLockTimeout() { return IndexWriter.WRITE_LOCK_TIMEOUT; } /** * Flushes all changes to an index and closes all * associated files. * * <p> If an Exception is hit during close, eg due to disk * full or some other reason, then both the on-disk index * and the internal state of the IndexWriter instance will * be consistent. However, the close will not be complete * even though part of it (flushing buffered documents) * may have succeeded, so the write lock will still be * held.</p> * * <p> If you can correct the underlying cause (eg free up * some disk space) then you can call close() again. * Failing that, if you want to force the write lock to be * released (dangerous, because you may then lose buffered * docs in the IndexWriter instance) then you can do * something like this:</p> * * <pre> * try { * writer.close(); * } finally { * if (IndexReader.isLocked(directory)) { * IndexReader.unlock(directory); * } * } * </pre> * * after which, you must be certain not to use the writer * instance anymore.</p> */ public synchronized void close() throws IOException { flushRamSegments(); ramDirectory.close(); if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; } if(closeDir) directory.close(); } /** Release the write lock, if needed. */ protected void finalize() throws Throwable { try { if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; } } finally { super.finalize(); } } /** Returns the Directory used by this index. */ public Directory getDirectory() { return directory; } /** Returns the analyzer used by this index. */ public Analyzer getAnalyzer() { return analyzer; } /** Returns the number of documents currently in this index. */ public synchronized int docCount() { int count = ramSegmentInfos.size(); for (int i = 0; i < segmentInfos.size(); i++) { SegmentInfo si = segmentInfos.info(i); count += si.docCount; } return count; } /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by * running out of memory.<p/> * Note that this effectively truncates large documents, excluding from the * index terms that occur further in the document. If you know your source * documents are large, be sure to set this value high enough to accomodate * the expected size. If you set it to Integer.MAX_VALUE, then the only limit * is your memory, but you should anticipate an OutOfMemoryError.<p/> * By default, no more than 10,000 terms will be indexed for a field. * */ private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH; /** * Adds a document to this index. If the document contains more than * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are * discarded. * * <p> Note that if an Exception is hit (for example disk full) * then the index will be consistent, but this document * may not have been added. Furthermore, it's possible * the index will have one segment in non-compound format * even when using compound files (when a merge has * partially succeeded).</p> * * <p> This method periodically flushes pending documents * to the Directory (every {@link #setMaxBufferedDocs}), * and also periodically merges segments in the index * (every {@link #setMergeFactor} flushes). When this * occurs, the method will take more time to run (possibly * a long time if the index is large), and will require * free temporary space in the Directory to do the * merging.</p> * * <p>The amount of free space required when a merge is * triggered is up to 1X the size of all segments being * merged, when no readers/searchers are open against the * index, and up to 2X the size of all segments being * merged when readers/searchers are open against the * index (see {@link #optimize()} for details). Most * merges are small (merging the smallest segments * together), but whenever a full merge occurs (all * segments in the index, which is the worst case for * temporary space usage) then the maximum free disk space * required is the same as {@link #optimize}.</p> */ public void addDocument(Document doc) throws IOException { addDocument(doc, analyzer); } /** * Adds a document to this index, using the provided analyzer instead of the * value of {@link #getAnalyzer()}. If the document contains more than * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are * discarded. * * <p>See {@link #addDocument(Document)} for details on * index and IndexWriter state after an Exception, and * flushing/merging temporary free space requirements.</p> */ public void addDocument(Document doc, Analyzer analyzer) throws IOException { SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer); synchronized (this) { ramSegmentInfos.addElement(newSegmentInfo); maybeFlushRamSegments(); } } SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer) throws IOException { DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this); dw.setInfoStream(infoStream); String segmentName = newRamSegmentName(); dw.addDocument(segmentName, doc); return new SegmentInfo(segmentName, 1, ramDirectory, false, false); } /** * Deletes the document(s) containing <code>term</code>. * @param term the term to identify the documents to be deleted
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -