📄 indexwriter.cs
字号:
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Analyzer = Lucene.Net.Analysis.Analyzer;
using Document = Lucene.Net.Documents.Document;
using Similarity = Lucene.Net.Search.Similarity;
using Directory = Lucene.Net.Store.Directory;
using FSDirectory = Lucene.Net.Store.FSDirectory;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using Lock = Lucene.Net.Store.Lock;
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
namespace Lucene.Net.Index
{
/// <summary> An IndexWriter creates and maintains an index.
///
/// <p>
/// The third argument (<code>create</code>) to the <a
/// href="#IndexWriter(Lucene.Net.Store.Directory,
/// Lucene.Net.Analysis.Analyzer, boolean)"><b>constructor</b></a>
/// determines whether a new index is created, or whether an existing index is
/// opened for the addition of new documents. Note that you can open an index
/// with create=true even while readers are using the index. The old readers will
/// continue to search the "point in time" snapshot they had opened, and won't
/// see the newly created index until they re-open.
/// </p>
///
/// <p>
/// In either case, documents are added with the <a
/// href="#addDocument(Lucene.Net.Documents.Document)"><b>addDocument</b></a>
/// method. When finished adding documents, <a href="#close()"><b>close</b></a>
/// should be called.
/// </p>
///
/// <p>
/// If an index will not have more documents added for a while and optimal search
/// performance is desired, then the <a href="#optimize()"><b>optimize</b></a>
/// method should be called before the index is closed.
/// </p>
///
/// <p>
/// Opening an IndexWriter creates a lock file for the directory in use. Trying
/// to open another IndexWriter on the same directory will lead to an
/// IOException. The IOException is also thrown if an IndexReader on the same
/// directory is used to delete documents from the index.
/// </p>
///
/// <p>
/// As of <b>2.1</b>, IndexWriter can now delete documents by {@link Term} (see
/// {@link #deleteDocuments} ) and update (delete then add) documents (see
/// {@link #updateDocument}). Deletes are buffered until {@link
/// #setMaxBufferedDeleteTerms} <code>Terms</code> at which point they are
/// flushed to the index. Note that a flush occurs when there are enough buffered
/// deletes or enough added documents, whichever is sooner. When a flush occurs,
/// both pending deletes and added documents are flushed to the index.
/// </p>
/// </summary>
public class IndexWriter
{
private void InitBlock()
{
similarity = Similarity.GetDefault();
}
/// <summary> Default value for the write lock timeout (1,000).
///
/// </summary>
/// <seealso cref="#setDefaultWriteLockTimeout">
/// </seealso>
public static long WRITE_LOCK_TIMEOUT = 1000;
private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
public const System.String WRITE_LOCK_NAME = "write.lock";
/// <summary> Default value is 10. Change using {@link #SetMergeFactor(int)}.</summary>
public const int DEFAULT_MERGE_FACTOR = 10;
/// <summary> Default value is 10. Change using {@link #SetMaxBufferedDocs(int)}.</summary>
public const int DEFAULT_MAX_BUFFERED_DOCS = 10;
/// <summary> Default value is 1000. Change using
/// {@link #SetMaxBufferedDeleteTerms(int)}.
/// </summary>
public const int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000;
/// <summary> Default value is {@link Integer#MAX_VALUE}. Change using
/// {@link #SetMaxMergeDocs(int)}.
/// </summary>
public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue;
/// <summary> Default value is 10,000. Change using {@link #SetMaxFieldLength(int)}.</summary>
public const int DEFAULT_MAX_FIELD_LENGTH = 10000;
/// <summary> Default value is 128. Change using {@link #SetTermIndexInterval(int)}.</summary>
public const int DEFAULT_TERM_INDEX_INTERVAL = 128;
private Directory directory; // where this index resides
private Analyzer analyzer; // how to analyze text
private Similarity similarity; // how to
// normalize
private bool inTransaction = false; // true iff we are in a transaction
private bool commitPending; // true if segmentInfos has changes not yet
// committed
private System.Collections.Hashtable protectedSegments; // segment names that should not be
// deleted until commit
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback
// to if the commit fails
internal SegmentInfos segmentInfos = new SegmentInfos(); // the segments
internal SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in
// ramDirectory
private RAMDirectory ramDirectory = new RAMDirectory(); // for temp
// segs
private IndexFileDeleter deleter;
private Lock writeLock;
private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
// The max number of delete terms that can be buffered before
// they must be flushed to disk.
private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
// This Hashmap buffers delete terms in ram before they are applied.
// The key is delete term; the value is number of ram
// segments the term applies to.
private System.Collections.Hashtable bufferedDeleteTerms = new System.Collections.Hashtable();
private int numBufferedDeleteTerms = 0;
/// <summary> Use compound file setting. Defaults to true, minimizing the number of
/// files used. Setting this to false may improve indexing performance, but
/// may also cause file handle problems.
/// </summary>
private bool useCompoundFile = true;
private bool closeDir;
/// <summary> Get the current setting of whether to use the compound file format. Note
/// that this just returns the value you set with setUseCompoundFile(boolean)
/// or the default. You cannot use this to query the status of an existing
/// index.
///
/// </summary>
/// <seealso cref="#SetUseCompoundFile(boolean)">
/// </seealso>
public virtual bool GetUseCompoundFile()
{
return useCompoundFile;
}
/// <summary> Setting to turn on usage of a compound file. When on, multiple files for
/// each segment are merged into a single file once the segment creation is
/// finished. This is done regardless of what directory is in use.
/// </summary>
public virtual void SetUseCompoundFile(bool value_Renamed)
{
useCompoundFile = value_Renamed;
}
/// <summary> Expert: Set the Similarity implementation used by this IndexWriter.
///
/// </summary>
/// <seealso cref="Similarity#SetDefault(Similarity)">
/// </seealso>
public virtual void SetSimilarity(Similarity similarity)
{
this.similarity = similarity;
}
/// <summary> Expert: Return the Similarity implementation used by this IndexWriter.
///
/// <p>
/// This defaults to the current value of {@link Similarity#GetDefault()}.
/// </summary>
public virtual Similarity GetSimilarity()
{
return this.similarity;
}
/// <summary> Expert: Set the interval between indexed terms. Large values cause less
/// memory to be used by IndexReader, but slow random-access to terms. Small
/// values cause more memory to be used by an IndexReader, and speed
/// random-access to terms.
///
/// This parameter determines the amount of computation required per query
/// term, regardless of the number of documents that contain that term. In
/// particular, it is the maximum number of other terms that must be scanned
/// before a term is located and its frequency and position information may
/// be processed. In a large index with user-entered query terms, query
/// processing time is likely to be dominated not by term lookup but rather
/// by the processing of frequency and positional data. In a small index or
/// when many uncommon query terms are generated (e.g., by wildcard queries)
/// term lookup may become a dominant cost.
///
/// In particular, <code>numUniqueTerms/interval</code> terms are read into
/// memory by an IndexReader, and, on average, <code>interval/2</code>
/// terms must be scanned for each random term access.
///
/// </summary>
/// <seealso cref="#DEFAULT_TERM_INDEX_INTERVAL">
/// </seealso>
public virtual void SetTermIndexInterval(int interval)
{
this.termIndexInterval = interval;
}
/// <summary> Expert: Return the interval between indexed terms.
///
/// </summary>
/// <seealso cref="#SetTermIndexInterval(int)">
/// </seealso>
public virtual int GetTermIndexInterval()
{
return termIndexInterval;
}
/// <summary> Constructs an IndexWriter for the index in <code>path</code>. Text
/// will be analyzed with <code>a</code>. If <code>create</code> is
/// true, then a new, empty index will be created in <code>path</code>,
/// replacing the index already there, if any.
///
/// </summary>
/// <param name="">path
/// the path to the index directory
/// </param>
/// <param name="">a
/// the analyzer to use
/// </param>
/// <param name="">create
/// <code>true</code> to create the index or overwrite the
/// existing one; <code>false</code> to append to the existing
/// index
/// </param>
/// <throws> IOException </throws>
/// <summary> if the directory cannot be read/written to, or if it does not
/// exist, and <code>create</code> is <code>false</code>
/// </summary>
public IndexWriter(System.String path, Analyzer a, bool create)
{
InitBlock();
Init(path, a, create);
}
/// <summary> Constructs an IndexWriter for the index in <code>path</code>. Text
/// will be analyzed with <code>a</code>. If <code>create</code> is
/// true, then a new, empty index will be created in <code>path</code>,
/// replacing the index already there, if any.
///
/// </summary>
/// <param name="">path
/// the path to the index directory
/// </param>
/// <param name="">a
/// the analyzer to use
/// </param>
/// <param name="">create
/// <code>true</code> to create the index or overwrite the
/// existing one; <code>false</code> to append to the existing
/// index
/// </param>
/// <throws> IOException </throws>
/// <summary> if the directory cannot be read/written to, or if it does not
/// exist, and <code>create</code> is <code>false</code>
/// </summary>
public IndexWriter(System.IO.FileInfo path, Analyzer a, bool create)
{
InitBlock();
Init(path, a, create);
}
/// <summary> Constructs an IndexWriter for the index in <code>d</code>. Text will
/// be analyzed with <code>a</code>. If <code>create</code> is true,
/// then a new, empty index will be created in <code>d</code>, replacing
/// the index already there, if any.
///
/// </summary>
/// <param name="">d
/// the index directory
/// </param>
/// <param name="">a
/// the analyzer to use
/// </param>
/// <param name="">create
/// <code>true</code> to create the index or overwrite the
/// existing one; <code>false</code> to append to the existing
/// index
/// </param>
/// <throws> IOException </throws>
/// <summary> if the directory cannot be read/written to, or if it does not
/// exist, and <code>create</code> is <code>false</code>
/// </summary>
public IndexWriter(Directory d, Analyzer a, bool create)
{
InitBlock();
Init(d, a, create, false);
}
/// <summary> Constructs an IndexWriter for the index in <code>path</code>, creating
/// it first if it does not already exist, otherwise appending to the
/// existing index. Text will be analyzed with <code>a</code>.
///
/// </summary>
/// <param name="">path
/// the path to the index directory
/// </param>
/// <param name="">a
/// the analyzer to use
/// </param>
/// <throws> IOException </throws>
/// <summary> if the directory cannot be created or read/written to
/// </summary>
public IndexWriter(System.String path, Analyzer a)
{
InitBlock();
if (IndexReader.IndexExists(path))
{
Init(path, a, false);
}
else
{
Init(path, a, true);
}
}
/// <summary> Constructs an IndexWriter for the index in <code>path</code>, creating
/// it first if it does not already exist, otherwise appending to the
/// existing index. Text will be analyzed with <code>a</code>.
///
/// </summary>
/// <param name="">path
/// the path to the index directory
/// </param>
/// <param name="">a
/// the analyzer to use
/// </param>
/// <throws> IOException </throws>
/// <summary> if the directory cannot be created or read/written to
/// </summary>
public IndexWriter(System.IO.FileInfo path, Analyzer a)
{
InitBlock();
if (IndexReader.IndexExists(path))
{
Init(path, a, false);
}
else
{
Init(path, a, true);
}
}
/// <summary> Constructs an IndexWriter for the index in <code>d</code>, creating it
/// first if it does not already exist, otherwise appending to the existing
/// index. Text will be analyzed with <code>a</code>.
///
/// </summary>
/// <param name="">d
/// the index directory
/// </param>
/// <param name="">a
/// the analyzer to use
/// </param>
/// <throws> IOException </throws>
/// <summary> if the directory cannot be created or read/written to
/// </summary>
public IndexWriter(Directory d, Analyzer a)
{
InitBlock();
if (IndexReader.IndexExists(d))
{
Init(d, a, false, false);
}
else
{
Init(d, a, true, false);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -