📄 indexwriter.java
字号:
package org.apache.lucene.index;/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and * "Apache Lucene" must not be used to endorse or promote products * derived from this software without prior written permission. For * written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * "Apache Lucene", nor may "Apache" appear in their name, without * prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation. For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */import java.io.IOException;import java.io.File;import java.io.PrintStream;import java.util.Vector;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.Lock;import org.apache.lucene.store.InputStream;import org.apache.lucene.store.OutputStream;import org.apache.lucene.document.Document;import org.apache.lucene.analysis.Analyzer;/** An IndexWriter creates and maintains an index. The third argument to the <a href="#IndexWriter"><b>constructor</b></a> determines whether a new index is created, or whether an existing index is opened for the addition of new documents. In either case, documents are added with the <a href="#addDocument"><b>addDocument</b></a> method. When finished adding documents, <a href="#close"><b>close</b></a> should be called. If an index will not have more documents added for a while and optimal search performance is desired, then the <a href="#optimize"><b>optimize</b></a> method should be called before the index is closed. */public final class IndexWriter { private Directory directory; // where this index resides private Analyzer analyzer; // how to analyze text private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private final Directory ramDirectory = new RAMDirectory(); // for temp segs private Lock writeLock; /** Constructs an IndexWriter for the index in <code>path</code>. Text will be analyzed with <code>a</code>. If <code>create</code> is true, then a new, empty index will be created in <code>path</code>, replacing the index already there, if any. */ public IndexWriter(String path, Analyzer a, boolean create) throws IOException { this(FSDirectory.getDirectory(path, create), a, create); } /** Constructs an IndexWriter for the index in <code>path</code>. Text will be analyzed with <code>a</code>. If <code>create</code> is true, then a new, empty index will be created in <code>path</code>, replacing the index already there, if any. */ public IndexWriter(File path, Analyzer a, boolean create) throws IOException { this(FSDirectory.getDirectory(path, create), a, create); } /** Constructs an IndexWriter for the index in <code>d</code>. Text will be analyzed with <code>a</code>. If <code>create</code> is true, then a new, empty index will be created in <code>d</code>, replacing the index already there, if any. */ public IndexWriter(Directory d, Analyzer a, final boolean create) throws IOException { directory = d; analyzer = a; Lock writeLock = directory.makeLock("write.lock"); if (!writeLock.obtain()) // obtain write lock throw new IOException("Index locked for write: " + writeLock); this.writeLock = writeLock; // save it synchronized (directory) { // in- & inter-process sync new Lock.With(directory.makeLock("commit.lock")) { public Object doBody() throws IOException { if (create) segmentInfos.write(directory); else segmentInfos.read(directory); return null; } }.run(); } } /** Flushes all changes to an index, closes all associated files, and closes the directory that the index is stored in. */ public final synchronized void close() throws IOException { flushRamSegments(); ramDirectory.close(); writeLock.release(); // release write lock writeLock = null; directory.close(); } /** Release the write lock, if needed. */ protected final void finalize() throws IOException { if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; } } /** Returns the number of documents currently in this index. */ public final synchronized int docCount() { int count = 0; for (int i = 0; i < segmentInfos.size(); i++) { SegmentInfo si = segmentInfos.info(i); count += si.docCount; } return count; } /** The maximum number of terms that will be indexed for a single field in a document. This limits the amount of memory required for indexing, so that collections with very large files will not crash the indexing process by running out of memory. <p>By default, no more than 10,000 terms will be indexed for a field. */ public int maxFieldLength = 10000; /** Adds a document to this index.*/ public final void addDocument(Document doc) throws IOException { DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, maxFieldLength); String segmentName = newSegmentName(); dw.addDocument(segmentName, doc); synchronized (this) { segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory)); maybeMergeSegments(); } } private final synchronized String newSegmentName() { return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } /** Determines how often segment indexes are merged by addDocument(). With * smaller values, less RAM is used while indexing, and searches on * unoptimized indexes are faster, but indexing speed is slower. With larger * values more RAM is used while indexing and searches on unoptimized indexes * are slower, but indexing is faster. Thus larger values (> 10) are best * for batched index creation, and smaller values (< 10) for indexes that are * interactively maintained. * * <p>This must never be less than 2. The default value is 10.*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -