indexwriter.java

来自「一套java版本的搜索引擎源码」· Java 代码 · 共 1,780 行 · 第 1/5 页
JAVA
1,780 行
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.search.Similarity;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.IndexInput;import org.apache.lucene.store.IndexOutput;import org.apache.lucene.store.Lock;import org.apache.lucene.store.RAMDirectory;import java.io.File;import java.io.IOException;import java.io.PrintStream;import java.util.Vector;import java.util.HashSet;import java.util.HashMap;import java.util.Iterator;import java.util.Map.Entry;/**  An IndexWriter creates and maintains an index.  <p>The third argument (<code>create</code>) to the   <a href="#IndexWriter(org.apache.lucene.store.Directory, org.apache.lucene.analysis.Analyzer, boolean)"><b>constructor</b></a>  determines whether a new index is created, or whether an existing index is  opened for the addition of new documents.  Note that you  can open an index with create=true even while readers are  using the index.  The old readers will continue to search  the "point in time" snapshot they had opened, and won't  see the newly created index until they re-open.</p>  <p>In either case, documents are added with the <a  href="#addDocument(org.apache.lucene.document.Document)"><b>addDocument</b></a> method.    When finished adding documents, <a href="#close()"><b>close</b></a> should be called.</p>  <p>If an index will not have more documents added for a while and optimal search  performance is desired, then the <a href="#optimize()"><b>optimize</b></a>  method should be called before the index is closed.</p>    <p>Opening an IndexWriter creates a lock file for the directory in use. Trying to open  another IndexWriter on the same directory will lead to an IOException. The IOException  is also thrown if an IndexReader on the same directory is used to delete documents  from the index.</p>    <p>As of <b>2.1</b>, IndexWriter can now delete documents  by {@link Term} (see {@link #deleteDocuments} ) and update  (delete then add) documents (see {@link #updateDocument}).  Deletes are buffered until {@link  #setMaxBufferedDeleteTerms} <code>Terms</code> at which  point they are flushed to the index.  Note that a flush  occurs when there are enough buffered deletes or enough  added documents, whichever is sooner.  When a flush  occurs, both pending deletes and added documents are  flushed to the index.</p>  */public class IndexWriter {  /**   * Default value for the write lock timeout (1,000).   * @see #setDefaultWriteLockTimeout   */  public static long WRITE_LOCK_TIMEOUT = 1000;  private long writeLockTimeout = WRITE_LOCK_TIMEOUT;  public static final String WRITE_LOCK_NAME = "write.lock";  /**   * Default value is 10. Change using {@link #setMergeFactor(int)}.   */  public final static int DEFAULT_MERGE_FACTOR = 10;  /**   * Default value is 10. Change using {@link #setMaxBufferedDocs(int)}.   */  public final static int DEFAULT_MAX_BUFFERED_DOCS = 10;  /**   * Default value is 1000. Change using {@link #setMaxBufferedDeleteTerms(int)}.   */  public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000;  /**   * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}.   */  public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;  /**   * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}.   */  public final static int DEFAULT_MAX_FIELD_LENGTH = 10000;  /**   * Default value is 128. Change using {@link #setTermIndexInterval(int)}.   */  public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;    private Directory directory;  // where this index resides  private Analyzer analyzer;    // how to analyze text  private Similarity similarity = Similarity.getDefault(); // how to normalize  private boolean inTransaction = false; // true iff we are in a transaction  private boolean commitPending; // true if segmentInfos has changes not yet committed  private HashSet protectedSegments; // segment names that should not be deleted until commit  private SegmentInfos rollbackSegmentInfos;      // segmentInfos we will fallback to if the commit fails  SegmentInfos segmentInfos = new SegmentInfos();       // the segments  SegmentInfos ramSegmentInfos = new SegmentInfos();    // the segments in ramDirectory  private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs  private IndexFileDeleter deleter;  private Lock writeLock;  private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;  // The max number of delete terms that can be buffered before  // they must be flushed to disk.  private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;  // This Hashmap buffers delete terms in ram before they are applied.  // The key is delete term; the value is number of ram  // segments the term applies to.  private HashMap bufferedDeleteTerms = new HashMap();  private int numBufferedDeleteTerms = 0;  /** Use compound file setting. Defaults to true, minimizing the number of   * files used.  Setting this to false may improve indexing performance, but   * may also cause file handle problems.   */  private boolean useCompoundFile = true;  private boolean closeDir;  /** Get the current setting of whether to use the compound file format.   *  Note that this just returns the value you set with setUseCompoundFile(boolean)   *  or the default. You cannot use this to query the status of an existing index.   *  @see #setUseCompoundFile(boolean)   */  public boolean getUseCompoundFile() {    return useCompoundFile;  }  /** Setting to turn on usage of a compound file. When on, multiple files   *  for each segment are merged into a single file once the segment creation   *  is finished. This is done regardless of what directory is in use.   */  public void setUseCompoundFile(boolean value) {    useCompoundFile = value;  }  /** Expert: Set the Similarity implementation used by this IndexWriter.   *   * @see Similarity#setDefault(Similarity)   */  public void setSimilarity(Similarity similarity) {    this.similarity = similarity;  }  /** Expert: Return the Similarity implementation used by this IndexWriter.   *   * <p>This defaults to the current value of {@link Similarity#getDefault()}.   */  public Similarity getSimilarity() {    return this.similarity;  }  /** Expert: Set the interval between indexed terms.  Large values cause less   * memory to be used by IndexReader, but slow random-access to terms.  Small   * values cause more memory to be used by an IndexReader, and speed   * random-access to terms.   *   * This parameter determines the amount of computation required per query   * term, regardless of the number of documents that contain that term.  In   * particular, it is the maximum number of other terms that must be   * scanned before a term is located and its frequency and position information   * may be processed.  In a large index with user-entered query terms, query   * processing time is likely to be dominated not by term lookup but rather   * by the processing of frequency and positional data.  In a small index   * or when many uncommon query terms are generated (e.g., by wildcard   * queries) term lookup may become a dominant cost.   *   * In particular, <code>numUniqueTerms/interval</code> terms are read into   * memory by an IndexReader, and, on average, <code>interval/2</code> terms   * must be scanned for each random term access.   *   * @see #DEFAULT_TERM_INDEX_INTERVAL   */  public void setTermIndexInterval(int interval) {    this.termIndexInterval = interval;  }  /** Expert: Return the interval between indexed terms.   *   * @see #setTermIndexInterval(int)   */  public int getTermIndexInterval() { return termIndexInterval; }  /**   * Constructs an IndexWriter for the index in <code>path</code>.   * Text will be analyzed with <code>a</code>.  If <code>create</code>   * is true, then a new, empty index will be created in   * <code>path</code>, replacing the index already there, if any.   *   * @param path the path to the index directory   * @param a the analyzer to use   * @param create <code>true</code> to create the index or overwrite   *  the existing one; <code>false</code> to append to the existing   *  index   * @throws IOException if the directory cannot be read/written to, or   *  if it does not exist, and <code>create</code> is   *  <code>false</code>   */  public IndexWriter(String path, Analyzer a, boolean create)       throws IOException {    init(path, a, create);  }  /**   * Constructs an IndexWriter for the index in <code>path</code>.   * Text will be analyzed with <code>a</code>.  If <code>create</code>   * is true, then a new, empty index will be created in   * <code>path</code>, replacing the index already there, if any.   *   * @param path the path to the index directory   * @param a the analyzer to use   * @param create <code>true</code> to create the index or overwrite   *  the existing one; <code>false</code> to append to the existing   *  index   * @throws IOException if the directory cannot be read/written to, or   *  if it does not exist, and <code>create</code> is   *  <code>false</code>   */  public IndexWriter(File path, Analyzer a, boolean create)       throws IOException {    init(path, a, create);  }  /**   * Constructs an IndexWriter for the index in <code>d</code>.   * Text will be analyzed with <code>a</code>.  If <code>create</code>   * is true, then a new, empty index will be created in   * <code>d</code>, replacing the index already there, if any.   *   * @param d the index directory   * @param a the analyzer to use   * @param create <code>true</code> to create the index or overwrite   *  the existing one; <code>false</code> to append to the existing   *  index   * @throws IOException if the directory cannot be read/written to, or   *  if it does not exist, and <code>create</code> is   *  <code>false</code>   */  public IndexWriter(Directory d, Analyzer a, boolean create)       throws IOException {    init(d, a, create, false);  }  /**   * Constructs an IndexWriter for the index in   * <code>path</code>, creating it first if it does not   * already exist, otherwise appending to the existing   * index.  Text will be analyzed with <code>a</code>.   *   * @param path the path to the index directory   * @param a the analyzer to use   * @throws IOException if the directory cannot be   *  created or read/written to   */  public IndexWriter(String path, Analyzer a)     throws IOException {    if (IndexReader.indexExists(path)) {      init(path, a, false);    } else {      init(path, a, true);    }  }  /**   * Constructs an IndexWriter for the index in   * <code>path</code>, creating it first if it does not   * already exist, otherwise appending to the existing   * index.  Text will be analyzed with   * <code>a</code>.   *   * @param path the path to the index directory   * @param a the analyzer to use   * @throws IOException if the directory cannot be   *  created or read/written to   */  public IndexWriter(File path, Analyzer a)     throws IOException {    if (IndexReader.indexExists(path)) {      init(path, a, false);    } else {      init(path, a, true);    }  }  /**   * Constructs an IndexWriter for the index in   * <code>d</code>, creating it first if it does not   * already exist, otherwise appending to the existing   * index.  Text will be analyzed with <code>a</code>.   *   * @param d the index directory   * @param a the analyzer to use   * @throws IOException if the directory cannot be   *  created or read/written to   */  public IndexWriter(Directory d, Analyzer a)     throws IOException {    if (IndexReader.indexExists(d)) {      init(d, a, false, false);    } else {      init(d, a, true, false);    }  }  private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir)    throws IOException {    init(d, a, create, closeDir);  }  private void init(String path, Analyzer a, final boolean create)    throws IOException {    init(FSDirectory.getDirectory(path), a, create, true);  }  private void init(File path, Analyzer a, final boolean create)    throws IOException {    init(FSDirectory.getDirectory(path), a, create, true);  }  private void init(Directory d, Analyzer a, final boolean create, boolean closeDir)    throws IOException {
indexwriter.java - 源码说明

本页面展示了「一套java版本的搜索引擎源码」中的 indexwriter.java 源码文件，采用 Java 编程语言编写，共 1,780 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?