📄 indexwriter.java

📁 一套java版本的搜索引擎源码
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    this.closeDir = closeDir;    directory = d;    analyzer = a;    if (create) {      // Clear the write lock in case it's leftover:      directory.clearLock(IndexWriter.WRITE_LOCK_NAME);    }    Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME);    if (!writeLock.obtain(writeLockTimeout)) // obtain write lock      throw new IOException("Index locked for write: " + writeLock);    this.writeLock = writeLock;                   // save it    try {      if (create) {        // Try to read first.  This is to allow create        // against an index that's currently open for        // searching.  In this case we write the next        // segments_N file with no segments:        try {          segmentInfos.read(directory);          segmentInfos.clear();        } catch (IOException e) {          // Likely this means it's a fresh directory        }        segmentInfos.write(directory);      } else {        segmentInfos.read(directory);      }      // Create a deleter to keep track of which files can      // be deleted:      deleter = new IndexFileDeleter(segmentInfos, directory);      deleter.setInfoStream(infoStream);      deleter.findDeletableFiles();      deleter.deleteFiles();    } catch (IOException e) {      this.writeLock.release();      this.writeLock = null;      throw e;    }  }  /** Determines the largest number of documents ever merged by addDocument().   * Small values (e.g., less than 10,000) are best for interactive indexing,   * as this limits the length of pauses while indexing to a few seconds.   * Larger values are best for batched indexing and speedier searches.   *   * <p>The default value is {@link Integer#MAX_VALUE}.   */  public void setMaxMergeDocs(int maxMergeDocs) {    this.maxMergeDocs = maxMergeDocs;  }  /**   * @see #setMaxMergeDocs   */  public int getMaxMergeDocs() {    return maxMergeDocs;  }  /**   * The maximum number of terms that will be indexed for a single field in a   * document.  This limits the amount of memory required for indexing, so that   * collections with very large files will not crash the indexing process by   * running out of memory.<p/>   * Note that this effectively truncates large documents, excluding from the   * index terms that occur further in the document.  If you know your source   * documents are large, be sure to set this value high enough to accomodate   * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit   * is your memory, but you should anticipate an OutOfMemoryError.<p/>   * By default, no more than 10,000 terms will be indexed for a field.   */  public void setMaxFieldLength(int maxFieldLength) {    this.maxFieldLength = maxFieldLength;  }  /**   * @see #setMaxFieldLength   */  public int getMaxFieldLength() {    return maxFieldLength;  }  /** Determines the minimal number of documents required before the buffered   * in-memory documents are merged and a new Segment is created.   * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},   * large value gives faster indexing.  At the same time, mergeFactor limits   * the number of files open in a FSDirectory.   *   * <p> The default value is 10.   *   * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2   */  public void setMaxBufferedDocs(int maxBufferedDocs) {    if (maxBufferedDocs < 2)      throw new IllegalArgumentException("maxBufferedDocs must at least be 2");    this.minMergeDocs = maxBufferedDocs;  }  /**   * @see #setMaxBufferedDocs   */  public int getMaxBufferedDocs() {    return minMergeDocs;  }  /**   * <p>Determines the minimal number of delete terms required before the buffered   * in-memory delete terms are applied and flushed. If there are documents   * buffered in memory at the time, they are merged and a new segment is   * created.</p>   * <p>The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}.   * @throws IllegalArgumentException if maxBufferedDeleteTerms is smaller than 1</p>   */  public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {    if (maxBufferedDeleteTerms < 1)      throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1");    this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;  }  /**   * @see #setMaxBufferedDeleteTerms   */  public int getMaxBufferedDeleteTerms() {    return maxBufferedDeleteTerms;  }  /** Determines how often segment indices are merged by addDocument().  With   * smaller values, less RAM is used while indexing, and searches on   * unoptimized indices are faster, but indexing speed is slower.  With larger   * values, more RAM is used during indexing, and while searches on unoptimized   * indices are slower, indexing is faster.  Thus larger values (> 10) are best   * for batch index creation, and smaller values (< 10) for indices that are   * interactively maintained.   *   * <p>This must never be less than 2.  The default value is 10.   */  public void setMergeFactor(int mergeFactor) {    if (mergeFactor < 2)      throw new IllegalArgumentException("mergeFactor cannot be less than 2");    this.mergeFactor = mergeFactor;  }  /**   * @see #setMergeFactor   */  public int getMergeFactor() {    return mergeFactor;  }  /** If non-null, information about merges and a message when   * maxFieldLength is reached will be printed to this.   */  public void setInfoStream(PrintStream infoStream) {    this.infoStream = infoStream;  }  /**   * @see #setInfoStream   */  public PrintStream getInfoStream() {    return infoStream;  }  /**   * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter.  @see   * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.   */  public void setWriteLockTimeout(long writeLockTimeout) {    this.writeLockTimeout = writeLockTimeout;  }  /**   * @see #setWriteLockTimeout   */  public long getWriteLockTimeout() {    return writeLockTimeout;  }  /**   * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in   * milliseconds).   */  public static void setDefaultWriteLockTimeout(long writeLockTimeout) {    IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;  }  /**   * @see #setDefaultWriteLockTimeout   */  public static long getDefaultWriteLockTimeout() {    return IndexWriter.WRITE_LOCK_TIMEOUT;  }  /**   * Flushes all changes to an index and closes all   * associated files.   *   * <p> If an Exception is hit during close, eg due to disk   * full or some other reason, then both the on-disk index   * and the internal state of the IndexWriter instance will   * be consistent.  However, the close will not be complete   * even though part of it (flushing buffered documents)   * may have succeeded, so the write lock will still be   * held.</p>   *    * <p> If you can correct the underlying cause (eg free up   * some disk space) then you can call close() again.   * Failing that, if you want to force the write lock to be   * released (dangerous, because you may then lose buffered   * docs in the IndexWriter instance) then you can do   * something like this:</p>   *   * <pre>   * try {   *   writer.close();   * } finally {   *   if (IndexReader.isLocked(directory)) {   *     IndexReader.unlock(directory);   *   }   * }   * </pre>   *   * after which, you must be certain not to use the writer   * instance anymore.</p>   */  public synchronized void close() throws IOException {    flushRamSegments();    ramDirectory.close();    if (writeLock != null) {      writeLock.release();                          // release write lock      writeLock = null;    }    if(closeDir)      directory.close();  }  /** Release the write lock, if needed. */  protected void finalize() throws Throwable {    try {      if (writeLock != null) {        writeLock.release();                        // release write lock        writeLock = null;      }    } finally {      super.finalize();    }  }  /** Returns the Directory used by this index. */  public Directory getDirectory() {      return directory;  }  /** Returns the analyzer used by this index. */  public Analyzer getAnalyzer() {      return analyzer;  }  /** Returns the number of documents currently in this index. */  public synchronized int docCount() {    int count = ramSegmentInfos.size();    for (int i = 0; i < segmentInfos.size(); i++) {      SegmentInfo si = segmentInfos.info(i);      count += si.docCount;    }    return count;  }  /**   * The maximum number of terms that will be indexed for a single field in a   * document.  This limits the amount of memory required for indexing, so that   * collections with very large files will not crash the indexing process by   * running out of memory.<p/>   * Note that this effectively truncates large documents, excluding from the   * index terms that occur further in the document.  If you know your source   * documents are large, be sure to set this value high enough to accomodate   * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit   * is your memory, but you should anticipate an OutOfMemoryError.<p/>   * By default, no more than 10,000 terms will be indexed for a field.   *   */  private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;  /**   * Adds a document to this index.  If the document contains more than   * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are   * discarded.   *   * <p> Note that if an Exception is hit (for example disk full)   * then the index will be consistent, but this document   * may not have been added.  Furthermore, it's possible   * the index will have one segment in non-compound format   * even when using compound files (when a merge has   * partially succeeded).</p>   *   * <p> This method periodically flushes pending documents   * to the Directory (every {@link #setMaxBufferedDocs}),   * and also periodically merges segments in the index   * (every {@link #setMergeFactor} flushes).  When this   * occurs, the method will take more time to run (possibly   * a long time if the index is large), and will require   * free temporary space in the Directory to do the   * merging.</p>   *   * <p>The amount of free space required when a merge is   * triggered is up to 1X the size of all segments being   * merged, when no readers/searchers are open against the   * index, and up to 2X the size of all segments being   * merged when readers/searchers are open against the   * index (see {@link #optimize()} for details).  Most   * merges are small (merging the smallest segments   * together), but whenever a full merge occurs (all   * segments in the index, which is the worst case for   * temporary space usage) then the maximum free disk space   * required is the same as {@link #optimize}.</p>   */  public void addDocument(Document doc) throws IOException {    addDocument(doc, analyzer);  }  /**   * Adds a document to this index, using the provided analyzer instead of the   * value of {@link #getAnalyzer()}.  If the document contains more than   * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are   * discarded.   *   * <p>See {@link #addDocument(Document)} for details on   * index and IndexWriter state after an Exception, and   * flushing/merging temporary free space requirements.</p>   */  public void addDocument(Document doc, Analyzer analyzer) throws IOException {    SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);    synchronized (this) {      ramSegmentInfos.addElement(newSegmentInfo);      maybeFlushRamSegments();    }  }  SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer)      throws IOException {    DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);    dw.setInfoStream(infoStream);    String segmentName = newRamSegmentName();    dw.addDocument(segmentName, doc);    return new SegmentInfo(segmentName, 1, ramDirectory, false, false);  }  /**   * Deletes the document(s) containing <code>term</code>.   * @param term the term to identify the documents to be deleted
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -