📄 indexwriter.java
字号:
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.search.Similarity;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.IndexInput;import org.apache.lucene.store.IndexOutput;import org.apache.lucene.store.Lock;import org.apache.lucene.store.RAMDirectory;import java.io.File;import java.io.IOException;import java.io.PrintStream;import java.util.Vector;import java.util.HashSet;import java.util.HashMap;import java.util.Iterator;import java.util.Map.Entry;/** An IndexWriter creates and maintains an index. <p>The third argument (<code>create</code>) to the <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a> determines whether a new index is created, or whether an existing index is opened for the addition of new documents. Note that you can open an index with create=true even while readers are using the index. The old readers will continue to search the "point in time" snapshot they had opened, and won't see the newly created index until they re-open.</p> <p>In either case, documents are added with the <a href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> method. When finished adding documents, <a href="#close()"><b>close</b></a> should be called.</p> <p>If an index will not have more documents added for a while and optimal search performance is desired, then the <a href="#optimize()"><b>optimize</b></a> method should be called before the index is closed.</p> <p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open another IndexWriter on the same directory will lead to an IOException. The IOException is also thrown if an IndexReader on the same directory is used to delete documents from the index.</p> <p>As of <b>2.1</b>, IndexWriter can now delete documents by {@link Term} (see {@link #deleteDocuments} ) and update (delete then add) documents (see {@link #updateDocument}). Deletes are buffered until {@link #setMaxBufferedDeleteTerms} <code>Terms</code> at which point they are flushed to the index. Note that a flush occurs when there are enough buffered deletes or enough added documents, whichever is sooner. When a flush occurs, both pending deletes and added documents are flushed to the index.</p> */public class IndexWriter { /** * Default value for the write lock timeout (1,000). * @see #setDefaultWriteLockTimeout */ public static long WRITE_LOCK_TIMEOUT = 1000; private long writeLockTimeout = WRITE_LOCK_TIMEOUT; public static final String WRITE_LOCK_NAME = "write.lock"; /** * Default value is 10. Change using {@link #setMergeFactor(int)}. */ public final static int DEFAULT_MERGE_FACTOR = 10; /** * Default value is 10. Change using {@link #setMaxBufferedDocs(int)}. */ public final static int DEFAULT_MAX_BUFFERED_DOCS = 10; /** * Default value is 1000. Change using {@link #setMaxBufferedDeleteTerms(int)}. */ public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000; /** * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}. */ public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; /** * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}. */ public final static int DEFAULT_MAX_FIELD_LENGTH = 10000; /** * Default value is 128. Change using {@link #setTermIndexInterval(int)}. */ public final static int DEFAULT_TERM_INDEX_INTERVAL = 128; private Directory directory; // where this index resides private Analyzer analyzer; // how to analyze text private Similarity similarity = Similarity.getDefault(); // how to normalize private boolean inTransaction = false; // true iff we are in a transaction private boolean commitPending; // true if segmentInfos has changes not yet committed private HashSet protectedSegments; // segment names that should not be deleted until commit private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails SegmentInfos segmentInfos = new SegmentInfos(); // the segments SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs private IndexFileDeleter deleter; private Lock writeLock; private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // The max number of delete terms that can be buffered before // they must be flushed to disk. private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS; // This Hashmap buffers delete terms in ram before they are applied. // The key is delete term; the value is number of ram // segments the term applies to. private HashMap bufferedDeleteTerms = new HashMap(); private int numBufferedDeleteTerms = 0; /** Use compound file setting. Defaults to true, minimizing the number of * files used. Setting this to false may improve indexing performance, but * may also cause file handle problems. */ private boolean useCompoundFile = true; private boolean closeDir; /** Get the current setting of whether to use the compound file format. * Note that this just returns the value you set with setUseCompoundFile(boolean) * or the default. You cannot use this to query the status of an existing index. * @see #setUseCompoundFile(boolean) */ public boolean getUseCompoundFile() { return useCompoundFile; } /** Setting to turn on usage of a compound file. When on, multiple files * for each segment are merged into a single file once the segment creation * is finished. This is done regardless of what directory is in use. */ public void setUseCompoundFile(boolean value) { useCompoundFile = value; } /** Expert: Set the Similarity implementation used by this IndexWriter. * * @see Similarity#setDefault(Similarity) */ public void setSimilarity(Similarity similarity) { this.similarity = similarity; } /** Expert: Return the Similarity implementation used by this IndexWriter. * * <p>This defaults to the current value of {@link Similarity#getDefault()}. */ public Similarity getSimilarity() { return this.similarity; } /** Expert: Set the interval between indexed terms. Large values cause less * memory to be used by IndexReader, but slow random-access to terms. Small * values cause more memory to be used by an IndexReader, and speed * random-access to terms. * * This parameter determines the amount of computation required per query * term, regardless of the number of documents that contain that term. In * particular, it is the maximum number of other terms that must be * scanned before a term is located and its frequency and position information * may be processed. In a large index with user-entered query terms, query * processing time is likely to be dominated not by term lookup but rather * by the processing of frequency and positional data. In a small index * or when many uncommon query terms are generated (e.g., by wildcard * queries) term lookup may become a dominant cost. * * In particular, <code>numUniqueTerms/interval</code> terms are read into * memory by an IndexReader, and, on average, <code>interval/2</code> terms * must be scanned for each random term access. * * @see #DEFAULT_TERM_INDEX_INTERVAL */ public void setTermIndexInterval(int interval) { this.termIndexInterval = interval; } /** Expert: Return the interval between indexed terms. * * @see #setTermIndexInterval(int) */ public int getTermIndexInterval() { return termIndexInterval; } /** * Constructs an IndexWriter for the index in <code>path</code>. * Text will be analyzed with <code>a</code>. If <code>create</code> * is true, then a new, empty index will be created in * <code>path</code>, replacing the index already there, if any. * * @param path the path to the index directory * @param a the analyzer to use * @param create <code>true</code> to create the index or overwrite * the existing one; <code>false</code> to append to the existing * index * @throws IOException if the directory cannot be read/written to, or * if it does not exist, and <code>create</code> is * <code>false</code> */ public IndexWriter(String path, Analyzer a, boolean create) throws IOException { init(path, a, create); } /** * Constructs an IndexWriter for the index in <code>path</code>. * Text will be analyzed with <code>a</code>. If <code>create</code> * is true, then a new, empty index will be created in * <code>path</code>, replacing the index already there, if any. * * @param path the path to the index directory * @param a the analyzer to use * @param create <code>true</code> to create the index or overwrite * the existing one; <code>false</code> to append to the existing * index * @throws IOException if the directory cannot be read/written to, or * if it does not exist, and <code>create</code> is * <code>false</code> */ public IndexWriter(File path, Analyzer a, boolean create) throws IOException { init(path, a, create); } /** * Constructs an IndexWriter for the index in <code>d</code>. * Text will be analyzed with <code>a</code>. If <code>create</code> * is true, then a new, empty index will be created in * <code>d</code>, replacing the index already there, if any. * * @param d the index directory * @param a the analyzer to use * @param create <code>true</code> to create the index or overwrite * the existing one; <code>false</code> to append to the existing * index * @throws IOException if the directory cannot be read/written to, or * if it does not exist, and <code>create</code> is * <code>false</code> */ public IndexWriter(Directory d, Analyzer a, boolean create) throws IOException { init(d, a, create, false); } /** * Constructs an IndexWriter for the index in * <code>path</code>, creating it first if it does not * already exist, otherwise appending to the existing * index. Text will be analyzed with <code>a</code>. * * @param path the path to the index directory * @param a the analyzer to use * @throws IOException if the directory cannot be * created or read/written to */ public IndexWriter(String path, Analyzer a) throws IOException { if (IndexReader.indexExists(path)) { init(path, a, false); } else { init(path, a, true); } } /** * Constructs an IndexWriter for the index in * <code>path</code>, creating it first if it does not * already exist, otherwise appending to the existing * index. Text will be analyzed with * <code>a</code>. * * @param path the path to the index directory * @param a the analyzer to use * @throws IOException if the directory cannot be * created or read/written to */ public IndexWriter(File path, Analyzer a) throws IOException { if (IndexReader.indexExists(path)) { init(path, a, false); } else { init(path, a, true); } } /** * Constructs an IndexWriter for the index in * <code>d</code>, creating it first if it does not * already exist, otherwise appending to the existing * index. Text will be analyzed with <code>a</code>. * * @param d the index directory * @param a the analyzer to use * @throws IOException if the directory cannot be * created or read/written to */ public IndexWriter(Directory d, Analyzer a) throws IOException { if (IndexReader.indexExists(d)) { init(d, a, false, false); } else { init(d, a, true, false); } } private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir) throws IOException { init(d, a, create, closeDir); } private void init(String path, Analyzer a, final boolean create) throws IOException { init(FSDirectory.getDirectory(path), a, create, true); } private void init(File path, Analyzer a, final boolean create) throws IOException { init(FSDirectory.getDirectory(path), a, create, true); } private void init(Directory d, Analyzer a, final boolean create, boolean closeDir) throws IOException {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -