📄 indexwriter.cs
字号:
ramSegmentInfos.Add(newSegmentInfo);
MaybeFlushRamSegments();
}
}
internal virtual SegmentInfo BuildSingleDocSegment(Document doc, Analyzer analyzer)
{
DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);
dw.SetInfoStream(infoStream);
System.String segmentName = NewRamSegmentName();
dw.AddDocument(segmentName, doc);
return new SegmentInfo(segmentName, 1, ramDirectory, false, false);
}
/// <summary> Deletes the document(s) containing <code>term</code>.
///
/// </summary>
/// <param name="">term
/// the term to identify the documents to be deleted
/// </param>
public virtual void DeleteDocuments(Term term)
{
lock (this)
{
BufferDeleteTerm(term);
MaybeFlushRamSegments();
}
}
/// <summary> Deletes the document(s) containing any of the terms. All deletes are
/// flushed at the same time.
///
/// </summary>
/// <param name="">terms
/// array of terms to identify the documents to be deleted
/// </param>
public virtual void DeleteDocuments(Term[] terms)
{
lock (this)
{
for (int i = 0; i < terms.Length; i++)
{
BufferDeleteTerm(terms[i]);
}
MaybeFlushRamSegments();
}
}
/// <summary> Updates a document by first deleting the document(s) containing
/// <code>term</code> and then adding the new document. The delete and then
/// add are atomic as seen by a reader on the same index (flush may happen
/// only after the add).
///
/// </summary>
/// <param name="">term
/// the term to identify the document(s) to be deleted
/// </param>
/// <param name="">doc
/// the document to be added
/// </param>
public virtual void UpdateDocument(Term term, Document doc)
{
UpdateDocument(term, doc, GetAnalyzer());
}
/// <summary> Updates a document by first deleting the document(s) containing
/// <code>term</code> and then adding the new document. The delete and then
/// add are atomic as seen by a reader on the same index (flush may happen
/// only after the add).
///
/// </summary>
/// <param name="">term
/// the term to identify the document(s) to be deleted
/// </param>
/// <param name="">doc
/// the document to be added
/// </param>
/// <param name="">analyzer
/// the analyzer to use when analyzing the document
/// </param>
public virtual void UpdateDocument(Term term, Document doc, Analyzer analyzer)
{
SegmentInfo newSegmentInfo = BuildSingleDocSegment(doc, analyzer);
lock (this)
{
BufferDeleteTerm(term);
ramSegmentInfos.Add(newSegmentInfo);
MaybeFlushRamSegments();
}
}
internal System.String NewRamSegmentName()
{
lock (this)
{
return "_ram_" + System.Convert.ToString(ramSegmentInfos.counter++, 16);
}
}
// for test purpose
internal int GetSegmentCount()
{
lock (this)
{
return segmentInfos.Count;
}
}
// for test purpose
internal int GetRamSegmentCount()
{
lock (this)
{
return ramSegmentInfos.Count;
}
}
// for test purpose
internal int GetDocCount(int i)
{
lock (this)
{
if (i >= 0 && i < segmentInfos.Count)
{
return segmentInfos.Info(i).docCount;
}
else
{
return - 1;
}
}
}
internal System.String NewSegmentName()
{
lock (this)
{
return "_" + System.Convert.ToString(segmentInfos.counter++, 16);
}
}
/// <summary> Determines how often segment indices are merged by addDocument(). With
/// smaller values, less RAM is used while indexing, and searches on
/// unoptimized indices are faster, but indexing speed is slower. With larger
/// values, more RAM is used during indexing, and while searches on
/// unoptimized indices are slower, indexing is faster. Thus larger values (>
/// 10) are best for batch index creation, and smaller values (< 10) for
/// indices that are interactively maintained.
///
/// <p>
/// This must never be less than 2. The default value is
/// {@link #DEFAULT_MERGE_FACTOR}.
///
/// </summary>
private int mergeFactor = DEFAULT_MERGE_FACTOR;
/// <summary> Determines the minimal number of documents required before the buffered
/// in-memory documents are merging and a new Segment is created. Since
/// Documents are merged in a {@link Lucene.Net.Store.RAMDirectory},
/// large value gives faster indexing. At the same time, mergeFactor limits
/// the number of files open in a FSDirectory.
///
/// <p>
/// The default value is {@link #DEFAULT_MAX_BUFFERED_DOCS}.
///
/// </summary>
private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS;
/// <summary> Determines the largest number of documents ever merged by addDocument().
/// Small values (e.g., less than 10,000) are best for interactive indexing,
/// as this limits the length of pauses while indexing to a few seconds.
/// Larger values are best for batched indexing and speedier searches.
///
/// <p>
/// The default value is {@link #DEFAULT_MAX_MERGE_DOCS}.
///
/// </summary>
private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
/// <summary> If non-null, information about merges will be printed to this.
///
/// </summary>
private System.IO.TextWriter infoStream = null;
/// <summary> Merges all segments together into a single segment, optimizing an index
/// for search.
///
/// <p>
/// Note that this requires substantial temporary free space in the Directory
/// (see <a target="_top"
/// href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a>
/// for details):
/// </p>
///
/// <ul>
/// <li>
///
/// <p>
/// If no readers/searchers are open against the index, then free space
/// required is up to 1X the total size of the starting index. For example,
/// if the starting index is 10 GB, then you must have up to 10 GB of free
/// space before calling optimize.
/// </p>
///
/// <li>
///
/// <p>
/// If readers/searchers are using the index, then free space required is up
/// to 2X the size of the starting index. This is because in addition to the
/// 1X used by optimize, the original 1X of the starting index is still
/// consuming space in the Directory as the readers are holding the segments
/// files open. Even on Unix, where it will appear as if the files are gone
/// ("ls" won't list them), they still consume storage due to "delete on last
/// close" semantics.
/// </p>
///
/// <p>
/// Furthermore, if some but not all readers re-open while the optimize is
/// underway, this will cause > 2X temporary space to be consumed as those
/// new readers will then hold open the partially optimized segments at that
/// time. It is best not to re-open readers while optimize is running.
/// </p>
///
/// </ul>
///
/// <p>
/// The actual temporary usage could be much less than these figures (it
/// depends on many factors).
/// </p>
///
/// <p>
/// Once the optimize completes, the total size of the index will be less
/// than the size of the starting index. It could be quite a bit smaller (if
/// there were many pending deletes) or just slightly smaller.
/// </p>
///
/// <p>
/// If an Exception is hit during optimize(), for example due to disk full,
/// the index will not be corrupt and no documents will have been lost.
/// However, it may have been partially optimized (some segments were merged
/// but not all), and it's possible that one of the segments in the index
/// will be in non-compound format even when using compound file format. This
/// will occur when the Exception is hit during conversion of the segment
/// into compound format.
/// </p>
/// </summary>
public virtual void Optimize()
{
lock (this)
{
FlushRamSegments();
while (segmentInfos.Count > 1 || (segmentInfos.Count == 1 && (SegmentReader.HasDeletions(segmentInfos.Info(0)) || SegmentReader.HasSeparateNorms(segmentInfos.Info(0)) || segmentInfos.Info(0).dir != directory || (useCompoundFile && (!SegmentReader.UsesCompoundFile(segmentInfos.Info(0)))))))
{
int minSegment = segmentInfos.Count - mergeFactor;
MergeSegments(segmentInfos, minSegment < 0?0:minSegment, segmentInfos.Count);
}
}
}
/*
* Begin a transaction. During a transaction, any segment merges that happen
* (or ram segments flushed) will not write a new segments file and will not
* remove any files that were present at the start of the transaction. You
* must make a matched (try/finall) call to commitTransaction() or
* rollbackTransaction() to finish the transaction.
*/
private void StartTransaction()
{
if (inTransaction)
{
throw new System.IO.IOException("transaction is already in process");
}
rollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone();
protectedSegments = new System.Collections.Hashtable();
for (int i = 0; i < segmentInfos.Count; i++)
{
SegmentInfo si = (SegmentInfo) segmentInfos[i];
protectedSegments.Add(si.name, si.name);
}
inTransaction = true;
}
/*
* Rolls back the transaction and restores state to where we were at the
* start.
*/
private void RollbackTransaction()
{
// Keep the same segmentInfos instance but replace all
// of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write once").
segmentInfos.Clear();
segmentInfos.AddRange(rollbackSegmentInfos);
// Ask deleter to locate unreferenced files & remove
// them:
deleter.ClearPendingFiles();
deleter.FindDeletableFiles();
deleter.DeleteFiles();
ClearTransaction();
}
/*
* Commits the transaction. This will write the new segments file and remove
* and pending deletions we have accumulated during the transaction
*/
private void CommitTransaction()
{
if (commitPending)
{
bool success = false;
try
{
// If we hit eg disk full during this write we have
// to rollback.:
segmentInfos.Write(directory); // commit changes
success = true;
}
finally
{
if (!success)
{
RollbackTransaction();
}
}
deleter.CommitPendingFiles();
commitPending = false;
}
ClearTransaction();
}
/*
* Should only be called by rollbackTransaction & commitTransaction
*/
private void ClearTransaction()
{
protectedSegments = null;
rollbackSegmentInfos = null;
inTransaction = false;
}
/// <summary> Merges all segments from an array of indexes into this index.
///
/// <p>
/// This may be used to parallelize batch indexing. A large document
/// collection can be broken into sub-collections. Each sub-collection can be
/// indexed in parallel, on a different thread, process or machine. The
/// complete index can then be created by merging sub-collection indexes with
/// this method.
///
/// <p>
/// After this completes, the index is optimized.
///
/// <p>
/// This method is transactional in how Exceptions are handled: it does not
/// commit a new segments_N file until all indexes are added. This means if
/// an Exception occurs (for example disk full), then either no indexes will
/// have been added or they all will have been.
/// </p>
///
/// <p>
/// If an Exception is hit, it's still possible that all indexes were
/// successfully added. This happens when the Exception is hit when trying to
/// build a CFS file. In this case, one segment in the index will be in
/// non-CFS format, even when using compound file format.
/// </p>
///
/// <p>
/// Also note that on an Exception, the index may still have been partially
/// or fully optimized even though none of the input indexes were added.
/// </p>
///
/// <p>
/// Note that this requires temporary free space in the Directory up to 2X
/// the sum of all input indexes (including the starting index). If
/// readers/searchers are open against the starting index, then temporary
/// free space required will be higher by the size of the starting index (see
/// {@link #Optimize()} for details).
/// </p>
///
/// <p>
/// Once this completes, the final size of the index will be less than the
/// sum of all input index sizes (including the starting index). It could be
/// quite a bit smaller (if there were many pending deletes) or just slightly
/// smaller.
/// </p>
///
/// <p>
/// See <a target="_top"
/// href="http://issues.apache.org/jira/browse/LUCENE-702">LUCENE-702</a>
/// for details.
/// </p>
/// </summary>
public virtual void AddIndexes(Directory[] dirs)
{
lock (this)
{
Optimize(); // start with zero or 1 seg
int start = segmentInfos.Count;
bool success = false;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -