📄 indexwriter.cs
字号:
private IndexWriter(Directory d, Analyzer a, bool create, bool closeDir)
{
InitBlock();
Init(d, a, create, closeDir);
}
private void Init(System.String path, Analyzer a, bool create)
{
Init(FSDirectory.GetDirectory(path), a, create, true);
}
private void Init(System.IO.FileInfo path, Analyzer a, bool create)
{
Init(FSDirectory.GetDirectory(path), a, create, true);
}
private void Init(Directory d, Analyzer a, bool create, bool closeDir)
{
this.closeDir = closeDir;
directory = d;
analyzer = a;
if (create)
{
// Clear the write lock in case it's leftover:
directory.ClearLock(IndexWriter.WRITE_LOCK_NAME);
}
Lock writeLock = directory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
if (!writeLock.Obtain(writeLockTimeout))
// obtain write lock
{
throw new System.IO.IOException("Index locked for write: " + writeLock);
}
this.writeLock = writeLock; // save it
try
{
if (create)
{
// Try to read first. This is to allow create
// against an index that's currently open for
// searching. In this case we write the next
// segments_N file with no segments:
try
{
segmentInfos.Read(directory);
segmentInfos.Clear();
}
catch (System.IO.IOException e)
{
// Likely this means it's a fresh directory
}
segmentInfos.Write(directory);
}
else
{
segmentInfos.Read(directory);
}
// Create a deleter to keep track of which files can
// be deleted:
deleter = new IndexFileDeleter(segmentInfos, directory);
deleter.SetInfoStream(infoStream);
deleter.FindDeletableFiles();
deleter.DeleteFiles();
}
catch (System.IO.IOException e)
{
this.writeLock.Release();
this.writeLock = null;
throw e;
}
}
/// <summary> Determines the largest number of documents ever merged by addDocument().
/// Small values (e.g., less than 10,000) are best for interactive indexing,
/// as this limits the length of pauses while indexing to a few seconds.
/// Larger values are best for batched indexing and speedier searches.
///
/// <p>
/// The default value is {@link Integer#MAX_VALUE}.
/// </summary>
public virtual void SetMaxMergeDocs(int maxMergeDocs)
{
this.maxMergeDocs = maxMergeDocs;
}
/// <seealso cref="#setMaxMergeDocs">
/// </seealso>
public virtual int GetMaxMergeDocs()
{
return maxMergeDocs;
}
/// <summary> The maximum number of terms that will be indexed for a single field in a
/// document. This limits the amount of memory required for indexing, so that
/// collections with very large files will not crash the indexing process by
/// running out of memory.<p/> Note that this effectively truncates large
/// documents, excluding from the index terms that occur further in the
/// document. If you know your source documents are large, be sure to set
/// this value high enough to accomodate the expected size. If you set it to
/// Integer.MAX_VALUE, then the only limit is your memory, but you should
/// anticipate an OutOfMemoryError.<p/> By default, no more than 10,000
/// terms will be indexed for a field.
/// </summary>
public virtual void SetMaxFieldLength(int maxFieldLength)
{
this.maxFieldLength = maxFieldLength;
}
/// <seealso cref="#setMaxFieldLength">
/// </seealso>
public virtual int GetMaxFieldLength()
{
return maxFieldLength;
}
/// <summary> Determines the minimal number of documents required before the buffered
/// in-memory documents are merged and a new Segment is created. Since
/// Documents are merged in a {@link Lucene.Net.Store.RAMDirectory},
/// large value gives faster indexing. At the same time, mergeFactor limits
/// the number of files open in a FSDirectory.
///
/// <p>
/// The default value is 10.
///
/// </summary>
/// <throws> IllegalArgumentException </throws>
/// <summary> if maxBufferedDocs is smaller than 2
/// </summary>
public virtual void SetMaxBufferedDocs(int maxBufferedDocs)
{
if (maxBufferedDocs < 2)
throw new System.ArgumentException("maxBufferedDocs must at least be 2");
this.minMergeDocs = maxBufferedDocs;
}
/// <seealso cref="#setMaxBufferedDocs">
/// </seealso>
public virtual int GetMaxBufferedDocs()
{
return minMergeDocs;
}
/// <summary> <p>
/// Determines the minimal number of delete terms required before the
/// buffered in-memory delete terms are applied and flushed. If there are
/// documents buffered in memory at the time, they are merged and a new
/// segment is created.
/// </p>
///
/// <p>
/// The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}.
///
/// </summary>
/// <throws> IllegalArgumentException </throws>
/// <summary> if maxBufferedDeleteTerms is smaller than 1
/// </p>
/// </summary>
public virtual void SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms)
{
if (maxBufferedDeleteTerms < 1)
throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1");
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
}
/// <seealso cref="#setMaxBufferedDeleteTerms">
/// </seealso>
public virtual int GetMaxBufferedDeleteTerms()
{
return maxBufferedDeleteTerms;
}
/// <summary> Determines how often segment indices are merged by addDocument(). With
/// smaller values, less RAM is used while indexing, and searches on
/// unoptimized indices are faster, but indexing speed is slower. With larger
/// values, more RAM is used during indexing, and while searches on
/// unoptimized indices are slower, indexing is faster. Thus larger values (>
/// 10) are best for batch index creation, and smaller values (< 10) for
/// indices that are interactively maintained.
///
/// <p>
/// This must never be less than 2. The default value is 10.
/// </summary>
public virtual void SetMergeFactor(int mergeFactor)
{
if (mergeFactor < 2)
throw new System.ArgumentException("mergeFactor cannot be less than 2");
this.mergeFactor = mergeFactor;
}
/// <seealso cref="#setMergeFactor">
/// </seealso>
public virtual int GetMergeFactor()
{
return mergeFactor;
}
/// <summary> If non-null, information about merges and a message when maxFieldLength
/// is reached will be printed to this.
/// </summary>
public virtual void SetInfoStream(System.IO.TextWriter infoStream)
{
this.infoStream = infoStream;
}
/// <seealso cref="#setInfoStream">
/// </seealso>
public virtual System.IO.TextWriter GetInfoStream()
{
return infoStream;
}
/// <summary> Sets the maximum time to wait for a write lock (in milliseconds) for this
/// instance of IndexWriter.
///
/// </summary>
/// <seealso cref="">
/// </seealso>
/// <seealso cref="to change the default value for all">
/// instances of IndexWriter.
/// </seealso>
public virtual void SetWriteLockTimeout(long writeLockTimeout)
{
this.writeLockTimeout = writeLockTimeout;
}
/// <seealso cref="#setWriteLockTimeout">
/// </seealso>
public virtual long GetWriteLockTimeout()
{
return writeLockTimeout;
}
/// <summary> Sets the default (for any instance of IndexWriter) maximum time to wait
/// for a write lock (in milliseconds).
/// </summary>
public static void SetDefaultWriteLockTimeout(long writeLockTimeout)
{
IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
}
/// <seealso cref="#setDefaultWriteLockTimeout">
/// </seealso>
public static long GetDefaultWriteLockTimeout()
{
return IndexWriter.WRITE_LOCK_TIMEOUT;
}
/// <summary> Flushes all changes to an index and closes all associated files.
///
/// <p>
/// If an Exception is hit during close, eg due to disk full or some other
/// reason, then both the on-disk index and the internal state of the
/// IndexWriter instance will be consistent. However, the close will not be
/// complete even though part of it (flushing buffered documents) may have
/// succeeded, so the write lock will still be held.
/// </p>
///
/// <p>
/// If you can correct the underlying cause (eg free up some disk space) then
/// you can call close() again. Failing that, if you want to force the write
/// lock to be released (dangerous, because you may then lose buffered docs
/// in the IndexWriter instance) then you can do something like this:
/// </p>
///
/// <pre>
/// try {
/// writer.close();
/// } finally {
/// if (IndexReader.isLocked(directory)) {
/// IndexReader.unlock(directory);
/// }
/// }
/// </pre>
///
/// after which, you must be certain not to use the writer instance anymore.
/// </p>
/// </summary>
public virtual void Close()
{
lock (this)
{
FlushRamSegments();
ramDirectory.Close();
if (writeLock != null)
{
writeLock.Release(); // release write lock
writeLock = null;
}
if (closeDir)
directory.Close();
}
}
/// <summary>Release the write lock, if needed. </summary>
~IndexWriter()
{
try
{
if (writeLock != null)
{
writeLock.Release(); // release write lock
writeLock = null;
}
}
finally
{
}
}
/// <summary>Returns the Directory used by this index. </summary>
public virtual Directory GetDirectory()
{
return directory;
}
/// <summary>Returns the analyzer used by this index. </summary>
public virtual Analyzer GetAnalyzer()
{
return analyzer;
}
/// <summary>Returns the number of documents currently in this index. </summary>
public virtual int DocCount()
{
lock (this)
{
int count = ramSegmentInfos.Count;
for (int i = 0; i < segmentInfos.Count; i++)
{
SegmentInfo si = segmentInfos.Info(i);
count += si.docCount;
}
return count;
}
}
/// <summary> The maximum number of terms that will be indexed for a single field in a
/// document. This limits the amount of memory required for indexing, so that
/// collections with very large files will not crash the indexing process by
/// running out of memory.<p/> Note that this effectively truncates large
/// documents, excluding from the index terms that occur further in the
/// document. If you know your source documents are large, be sure to set
/// this value high enough to accomodate the expected size. If you set it to
/// Integer.MAX_VALUE, then the only limit is your memory, but you should
/// anticipate an OutOfMemoryError.<p/> By default, no more than 10,000
/// terms will be indexed for a field.
///
/// </summary>
private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
/// <summary> Adds a document to this index. If the document contains more than
/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder
/// are discarded.
///
/// <p>
/// Note that if an Exception is hit (for example disk full) then the index
/// will be consistent, but this document may not have been added.
/// Furthermore, it's possible the index will have one segment in
/// non-compound format even when using compound files (when a merge has
/// partially succeeded).
/// </p>
///
/// <p>
/// This method periodically flushes pending documents to the Directory
/// (every {@link #setMaxBufferedDocs}), and also periodically merges
/// segments in the index (every {@link #setMergeFactor} flushes). When this
/// occurs, the method will take more time to run (possibly a long time if
/// the index is large), and will require free temporary space in the
/// Directory to do the merging.
/// </p>
///
/// <p>
/// The amount of free space required when a merge is triggered is up to 1X
/// the size of all segments being merged, when no readers/searchers are open
/// against the index, and up to 2X the size of all segments being merged
/// when readers/searchers are open against the index (see
/// {@link #Optimize()} for details). Most merges are small (merging the
/// smallest segments together), but whenever a full merge occurs (all
/// segments in the index, which is the worst case for temporary space usage)
/// then the maximum free disk space required is the same as
/// {@link #optimize}.
/// </p>
/// </summary>
public virtual void AddDocument(Document doc)
{
AddDocument(doc, analyzer);
}
/// <summary> Adds a document to this index, using the provided analyzer instead of the
/// value of {@link #GetAnalyzer()}. If the document contains more than
/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder
/// are discarded.
///
/// <p>
/// See {@link #AddDocument(Document)} for details on index and IndexWriter
/// state after an Exception, and flushing/merging temporary free space
/// requirements.
/// </p>
/// </summary>
public virtual void AddDocument(Document doc, Analyzer analyzer)
{
SegmentInfo newSegmentInfo = BuildSingleDocSegment(doc, analyzer);
lock (this)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -