📄 indexwriter.java
字号:
package org.apache.lucene.index;/** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.io.File;import java.io.PrintStream;import java.util.Vector;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.Lock;import org.apache.lucene.store.InputStream;import org.apache.lucene.store.OutputStream;import org.apache.lucene.search.Similarity;import org.apache.lucene.document.Document;import org.apache.lucene.analysis.Analyzer;/** An IndexWriter creates and maintains an index. The third argument to the <a href="#IndexWriter"><b>constructor</b></a> determines whether a new index is created, or whether an existing index is opened for the addition of new documents. In either case, documents are added with the <a href="#addDocument"><b>addDocument</b></a> method. When finished adding documents, <a href="#close"><b>close</b></a> should be called. If an index will not have more documents added for a while and optimal search performance is desired, then the <a href="#optimize"><b>optimize</b></a> method should be called before the index is closed. */public class IndexWriter { /** * Default value is 1000. Use <code>org.apache.lucene.writeLockTimeout</code> * system property to override. */ public static long WRITE_LOCK_TIMEOUT = Integer.parseInt(System.getProperty("org.apache.lucene.writeLockTimeout", "1000")); /** * Default value is 10000. Use <code>org.apache.lucene.commitLockTimeout</code> * system property to override. */ public static long COMMIT_LOCK_TIMEOUT = Integer.parseInt(System.getProperty("org.apache.lucene.commitLockTimeout", "10000")); public static final String WRITE_LOCK_NAME = "write.lock"; public static final String COMMIT_LOCK_NAME = "commit.lock"; /** * Default value is 10. Use <code>org.apache.lucene.mergeFactor</code> * system property to override. */ public static final int DEFAULT_MERGE_FACTOR = Integer.parseInt(System.getProperty("org.apache.lucene.mergeFactor", "10")); /** * Default value is 10. Use <code>org.apache.lucene.minMergeDocs</code> * system property to override. */ public static final int DEFAULT_MIN_MERGE_DOCS = Integer.parseInt(System.getProperty("org.apache.lucene.minMergeDocs", "10")); /** * Default value is {@link Integer#MAX_VALUE}. * Use <code>org.apache.lucene.maxMergeDocs</code> system property to override. */ public static final int DEFAULT_MAX_MERGE_DOCS = Integer.parseInt(System.getProperty("org.apache.lucene.maxMergeDocs", String.valueOf(Integer.MAX_VALUE))); /** * Default value is 10000. Use <code>org.apache.lucene.maxFieldLength</code> * system property to override. */ public static final int DEFAULT_MAX_FIELD_LENGTH = Integer.parseInt(System.getProperty("org.apache.lucene.maxFieldLength", "10000")); private Directory directory; // where this index resides private Analyzer analyzer; // how to analyze text private Similarity similarity = Similarity.getDefault(); // how to normalize private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private final Directory ramDirectory = new RAMDirectory(); // for temp segs private Lock writeLock; /** Use compound file setting. Defaults to true, minimizing the number of * files used. Setting this to false may improve indexing performance, but * may also cause file handle problems. */ private boolean useCompoundFile = true; private boolean closeDir; /** Setting to turn on usage of a compound file. When on, multiple files * for each segment are merged into a single file once the segment creation * is finished. This is done regardless of what directory is in use. */ public boolean getUseCompoundFile() { return useCompoundFile; } /** Setting to turn on usage of a compound file. When on, multiple files * for each segment are merged into a single file once the segment creation * is finished. This is done regardless of what directory is in use. */ public void setUseCompoundFile(boolean value) { useCompoundFile = value; } /** Expert: Set the Similarity implementation used by this IndexWriter. * * @see Similarity#setDefault(Similarity) */ public void setSimilarity(Similarity similarity) { this.similarity = similarity; } /** Expert: Return the Similarity implementation used by this IndexWriter. * * <p>This defaults to the current value of {@link Similarity#getDefault()}. */ public Similarity getSimilarity() { return this.similarity; } /** * Constructs an IndexWriter for the index in <code>path</code>. * Text will be analyzed with <code>a</code>. If <code>create</code> * is true, then a new, empty index will be created in * <code>path</code>, replacing the index already there, if any. * * @param path the path to the index directory * @param a the analyzer to use * @param create <code>true</code> to create the index or overwrite * the existing one; <code>false</code> to append to the existing * index * @throws IOException if the directory cannot be read/written to, or * if it does not exist, and <code>create</code> is * <code>false</code> */ public IndexWriter(String path, Analyzer a, boolean create) throws IOException { this(FSDirectory.getDirectory(path, create), a, create, true); } /** * Constructs an IndexWriter for the index in <code>path</code>. * Text will be analyzed with <code>a</code>. If <code>create</code> * is true, then a new, empty index will be created in * <code>path</code>, replacing the index already there, if any. * * @param path the path to the index directory * @param a the analyzer to use * @param create <code>true</code> to create the index or overwrite * the existing one; <code>false</code> to append to the existing * index * @throws IOException if the directory cannot be read/written to, or * if it does not exist, and <code>create</code> is * <code>false</code> */ public IndexWriter(File path, Analyzer a, boolean create) throws IOException { this(FSDirectory.getDirectory(path, create), a, create, true); } /** * Constructs an IndexWriter for the index in <code>d</code>. * Text will be analyzed with <code>a</code>. If <code>create</code> * is true, then a new, empty index will be created in * <code>d</code>, replacing the index already there, if any. * * @param d the index directory * @param a the analyzer to use * @param create <code>true</code> to create the index or overwrite * the existing one; <code>false</code> to append to the existing * index * @throws IOException if the directory cannot be read/written to, or * if it does not exist, and <code>create</code> is * <code>false</code> */ public IndexWriter(Directory d, Analyzer a, boolean create) throws IOException { this(d, a, create, false); } private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir) throws IOException { this.closeDir = closeDir; directory = d; analyzer = a; Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME); if (!writeLock.obtain(WRITE_LOCK_TIMEOUT)) // obtain write lock throw new IOException("Index locked for write: " + writeLock); this.writeLock = writeLock; // save it synchronized (directory) { // in- & inter-process sync new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) { public Object doBody() throws IOException { if (create) segmentInfos.write(directory); else segmentInfos.read(directory); return null; } }.run(); } } /** Flushes all changes to an index and closes all associated files. */ public synchronized void close() throws IOException { flushRamSegments(); ramDirectory.close(); writeLock.release(); // release write lock writeLock = null; if(closeDir) directory.close(); } /** Release the write lock, if needed. */ protected void finalize() throws IOException { if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; } } /** Returns the analyzer used by this index. */ public Analyzer getAnalyzer() { return analyzer; } /** Returns the number of documents currently in this index. */ public synchronized int docCount() { int count = 0; for (int i = 0; i < segmentInfos.size(); i++) { SegmentInfo si = segmentInfos.info(i); count += si.docCount; } return count; } /** * The maximum number of terms that will be indexed for a single field in a * document. This limits the amount of memory required for indexing, so that * collections with very large files will not crash the indexing process by * running out of memory.<p/> * Note that this effectively truncates large documents, excluding from the * index terms that occur further in the document. If you know your source * documents are large, be sure to set this value high enough to accomodate * the expected size. If you set it to Integer.MAX_VALUE, then the only limit * is your memory, but you should anticipate an OutOfMemoryError.<p/> * By default, no more than 10,000 terms will be indexed for a field. */ public int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH; /** * Adds a document to this index. If the document contains more than * {@link #maxFieldLength} terms for a given field, the remainder are * discarded.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -