📄 indexwriter.java

📁 索引aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package org.apache.lucene.index;/** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.io.File;import java.io.PrintStream;import java.util.Vector;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.Lock;import org.apache.lucene.store.InputStream;import org.apache.lucene.store.OutputStream;import org.apache.lucene.search.Similarity;import org.apache.lucene.document.Document;import org.apache.lucene.analysis.Analyzer;/**  An IndexWriter creates and maintains an index.  The third argument to the <a href="#IndexWriter"><b>constructor</b></a>  determines whether a new index is created, or whether an existing index is  opened for the addition of new documents.  In either case, documents are added with the <a  href="#addDocument"><b>addDocument</b></a> method.  When finished adding  documents, <a href="#close"><b>close</b></a> should be called.  If an index will not have more documents added for a while and optimal search  performance is desired, then the <a href="#optimize"><b>optimize</b></a>  method should be called before the index is closed.  */public class IndexWriter {  /**   * Default value is 1000.  Use <code>org.apache.lucene.writeLockTimeout</code>   * system property to override.   */  public static long WRITE_LOCK_TIMEOUT =    Integer.parseInt(System.getProperty("org.apache.lucene.writeLockTimeout",      "1000"));  /**   * Default value is 10000.  Use <code>org.apache.lucene.commitLockTimeout</code>   * system property to override.   */  public static long COMMIT_LOCK_TIMEOUT =    Integer.parseInt(System.getProperty("org.apache.lucene.commitLockTimeout",      "10000"));  public static final String WRITE_LOCK_NAME = "write.lock";  public static final String COMMIT_LOCK_NAME = "commit.lock";  /**   * Default value is 10.  Use <code>org.apache.lucene.mergeFactor</code>   * system property to override.   */  public static final int DEFAULT_MERGE_FACTOR =    Integer.parseInt(System.getProperty("org.apache.lucene.mergeFactor",      "10"));  /**   * Default value is 10.  Use <code>org.apache.lucene.minMergeDocs</code>   * system property to override.   */  public static final int DEFAULT_MIN_MERGE_DOCS =    Integer.parseInt(System.getProperty("org.apache.lucene.minMergeDocs",      "10"));  /**   * Default value is {@link Integer#MAX_VALUE}.   * Use <code>org.apache.lucene.maxMergeDocs</code> system property to override.   */  public static final int DEFAULT_MAX_MERGE_DOCS =    Integer.parseInt(System.getProperty("org.apache.lucene.maxMergeDocs",      String.valueOf(Integer.MAX_VALUE)));  /**   * Default value is 10000.  Use <code>org.apache.lucene.maxFieldLength</code>   * system property to override.   */  public static final int DEFAULT_MAX_FIELD_LENGTH =    Integer.parseInt(System.getProperty("org.apache.lucene.maxFieldLength",      "10000"));  private Directory directory;  // where this index resides  private Analyzer analyzer;    // how to analyze text  private Similarity similarity = Similarity.getDefault(); // how to normalize  private SegmentInfos segmentInfos = new SegmentInfos(); // the segments  private final Directory ramDirectory = new RAMDirectory(); // for temp segs  private Lock writeLock;  /** Use compound file setting. Defaults to true, minimizing the number of   * files used.  Setting this to false may improve indexing performance, but   * may also cause file handle problems.   */  private boolean useCompoundFile = true;    private boolean closeDir;  /** Setting to turn on usage of a compound file. When on, multiple files   *  for each segment are merged into a single file once the segment creation   *  is finished. This is done regardless of what directory is in use.   */  public boolean getUseCompoundFile() {    return useCompoundFile;  }  /** Setting to turn on usage of a compound file. When on, multiple files   *  for each segment are merged into a single file once the segment creation   *  is finished. This is done regardless of what directory is in use.   */  public void setUseCompoundFile(boolean value) {    useCompoundFile = value;  }    /** Expert: Set the Similarity implementation used by this IndexWriter.   *   * @see Similarity#setDefault(Similarity)   */  public void setSimilarity(Similarity similarity) {    this.similarity = similarity;  }  /** Expert: Return the Similarity implementation used by this IndexWriter.   *   * <p>This defaults to the current value of {@link Similarity#getDefault()}.   */  public Similarity getSimilarity() {    return this.similarity;  }  /**   * Constructs an IndexWriter for the index in <code>path</code>.   * Text will be analyzed with <code>a</code>.  If <code>create</code>   * is true, then a new, empty index will be created in   * <code>path</code>, replacing the index already there, if any.   *   * @param path the path to the index directory   * @param a the analyzer to use   * @param create <code>true</code> to create the index or overwrite   *  the existing one; <code>false</code> to append to the existing   *  index   * @throws IOException if the directory cannot be read/written to, or   *  if it does not exist, and <code>create</code> is   *  <code>false</code>   */  public IndexWriter(String path, Analyzer a, boolean create)       throws IOException {    this(FSDirectory.getDirectory(path, create), a, create, true);  }  /**   * Constructs an IndexWriter for the index in <code>path</code>.   * Text will be analyzed with <code>a</code>.  If <code>create</code>   * is true, then a new, empty index will be created in   * <code>path</code>, replacing the index already there, if any.   *   * @param path the path to the index directory   * @param a the analyzer to use   * @param create <code>true</code> to create the index or overwrite   *  the existing one; <code>false</code> to append to the existing   *  index   * @throws IOException if the directory cannot be read/written to, or   *  if it does not exist, and <code>create</code> is   *  <code>false</code>   */  public IndexWriter(File path, Analyzer a, boolean create)       throws IOException {    this(FSDirectory.getDirectory(path, create), a, create, true);  }  /**   * Constructs an IndexWriter for the index in <code>d</code>.   * Text will be analyzed with <code>a</code>.  If <code>create</code>   * is true, then a new, empty index will be created in   * <code>d</code>, replacing the index already there, if any.   *   * @param d the index directory   * @param a the analyzer to use   * @param create <code>true</code> to create the index or overwrite   *  the existing one; <code>false</code> to append to the existing   *  index   * @throws IOException if the directory cannot be read/written to, or   *  if it does not exist, and <code>create</code> is   *  <code>false</code>   */  public IndexWriter(Directory d, Analyzer a, boolean create)       throws IOException {    this(d, a, create, false);  }    private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir)    throws IOException {      this.closeDir = closeDir;      directory = d;      analyzer = a;      Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME);      if (!writeLock.obtain(WRITE_LOCK_TIMEOUT)) // obtain write lock        throw new IOException("Index locked for write: " + writeLock);      this.writeLock = writeLock;                   // save it      synchronized (directory) {        // in- & inter-process sync        new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) {            public Object doBody() throws IOException {              if (create)                segmentInfos.write(directory);              else                segmentInfos.read(directory);              return null;            }          }.run();      }  }  /** Flushes all changes to an index and closes all associated files. */  public synchronized void close() throws IOException {    flushRamSegments();    ramDirectory.close();    writeLock.release();                          // release write lock    writeLock = null;    if(closeDir)      directory.close();  }  /** Release the write lock, if needed. */  protected void finalize() throws IOException {    if (writeLock != null) {      writeLock.release();                        // release write lock      writeLock = null;    }  }  /** Returns the analyzer used by this index. */  public Analyzer getAnalyzer() {      return analyzer;  }  /** Returns the number of documents currently in this index. */  public synchronized int docCount() {    int count = 0;    for (int i = 0; i < segmentInfos.size(); i++) {      SegmentInfo si = segmentInfos.info(i);      count += si.docCount;    }    return count;  }  /**   * The maximum number of terms that will be indexed for a single field in a   * document.  This limits the amount of memory required for indexing, so that   * collections with very large files will not crash the indexing process by   * running out of memory.<p/>   * Note that this effectively truncates large documents, excluding from the   * index terms that occur further in the document.  If you know your source   * documents are large, be sure to set this value high enough to accomodate   * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit   * is your memory, but you should anticipate an OutOfMemoryError.<p/>   * By default, no more than 10,000 terms will be indexed for a field.  */  public int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;  /**   * Adds a document to this index.  If the document contains more than   * {@link #maxFieldLength} terms for a given field, the remainder are   * discarded.
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -