📄 indexwriter.cs
字号:
StartTransaction();
try
{
for (int i = 0; i < dirs.Length; i++)
{
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.Read(dirs[i]);
for (int j = 0; j < sis.Count; j++)
{
segmentInfos.Add(sis.Info(j)); // add each info
}
}
// merge newly added segments in log(n) passes
while (segmentInfos.Count > start + mergeFactor)
{
for (int base_Renamed = start; base_Renamed < segmentInfos.Count; base_Renamed++)
{
int end = System.Math.Min(segmentInfos.Count, base_Renamed + mergeFactor);
if (end - base_Renamed > 1)
{
MergeSegments(segmentInfos, base_Renamed, end);
}
}
}
success = true;
}
finally
{
if (success)
{
CommitTransaction();
}
else
{
RollbackTransaction();
}
}
Optimize(); // final cleanup
}
}
/// <summary> Merges all segments from an array of indexes into this index.
/// <p>
/// This is similar to addIndexes(Directory[]). However, no optimize() is
/// called either at the beginning or at the end. Instead, merges are carried
/// out as necessary.
/// <p>
/// This requires this index not be among those to be added, and the upper
/// bound* of those segment doc counts not exceed maxMergeDocs.
///
/// <p>
/// See {@link #AddIndexes(Directory[])} for details on transactional
/// semantics, temporary free space required in the Directory, and non-CFS
/// segments on an Exception.
/// </p>
/// </summary>
public virtual void AddIndexesNoOptimize(Directory[] dirs)
{
lock (this)
{
// Adding indexes can be viewed as adding a sequence of segments S to
// a sequence of segments T. Segments in T follow the invariants but
// segments in S may not since they could come from multiple indexes.
// Here is the merge algorithm for addIndexesNoOptimize():
//
// 1 Flush ram segments.
// 2 Consider a combined sequence with segments from T followed
// by segments from S (same as current addIndexes(Directory[])).
// 3 Assume the highest level for segments in S is h. Call
// maybeMergeSegments(), but instead of starting w/ lowerBound = -1
// and upperBound = maxBufferedDocs, start w/ lowerBound = -1 and
// upperBound = upperBound of level h. After this, the invariants
// are guaranteed except for the last < M segments whose levels <= h.
// 4 If the invariants hold for the last < M segments whose levels <= h,
// if some of those < M segments are from S (not merged in step 3),
// properly copy them over*, otherwise done.
// Otherwise, simply merge those segments. If the merge results in
// a segment of level <= h, done. Otherwise, it's of level h+1 and call
// maybeMergeSegments() starting w/ upperBound = upperBound of level
// h+1.
//
// * Ideally, we want to simply copy a segment. However, directory does
// not support copy yet. In addition, source may use compound file or
// not
// and target may use compound file or not. So we use mergeSegments() to
// copy a segment, which may cause doc count to change because deleted
// docs are garbage collected.
// 1 flush ram segments
FlushRamSegments();
// 2 copy segment infos and find the highest level from dirs
int start = segmentInfos.Count;
int startUpperBound = minMergeDocs;
bool success = false;
StartTransaction();
try
{
try
{
for (int i = 0; i < dirs.Length; i++)
{
if (directory == dirs[i])
{
// cannot add this index: segments may be deleted in
// merge before added
throw new System.ArgumentException("Cannot add this index to itself");
}
SegmentInfos sis = new SegmentInfos(); // read infos from
// dir
sis.Read(dirs[i]);
for (int j = 0; j < sis.Count; j++)
{
SegmentInfo info = sis.Info(j);
segmentInfos.Add(info); // add each info
while (startUpperBound < info.docCount)
{
startUpperBound *= mergeFactor; // find the highest
// level from dirs
if (startUpperBound > maxMergeDocs)
{
// upper bound cannot exceed maxMergeDocs
throw new System.ArgumentException("Upper bound cannot exceed maxMergeDocs");
}
}
}
}
}
catch (System.ArgumentException e)
{
for (int i = segmentInfos.Count - 1; i >= start; i--)
{
segmentInfos.RemoveAt(i);
}
throw e;
}
// 3 maybe merge segments starting from the highest level from dirs
MaybeMergeSegments(startUpperBound);
// get the tail segments whose levels <= h
int segmentCount = segmentInfos.Count;
int numTailSegments = 0;
while (numTailSegments < segmentCount && startUpperBound >= segmentInfos.Info(segmentCount - 1 - numTailSegments).docCount)
{
numTailSegments++;
}
if (numTailSegments == 0)
{
success = true;
return ;
}
// 4 make sure invariants hold for the tail segments whose levels <=
// h
if (CheckNonDecreasingLevels(segmentCount - numTailSegments))
{
// identify the segments from S to be copied (not merged in 3)
int numSegmentsToCopy = 0;
while (numSegmentsToCopy < segmentCount && directory != segmentInfos.Info(segmentCount - 1 - numSegmentsToCopy).dir)
{
numSegmentsToCopy++;
}
if (numSegmentsToCopy == 0)
{
success = true;
return ;
}
// copy those segments from S
for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++)
{
MergeSegments(segmentInfos, i, i + 1);
}
if (CheckNonDecreasingLevels(segmentCount - numSegmentsToCopy))
{
success = true;
return ;
}
}
// invariants do not hold, simply merge those segments
MergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount);
// maybe merge segments again if necessary
if (segmentInfos.Info(segmentInfos.Count - 1).docCount > startUpperBound)
{
MaybeMergeSegments(startUpperBound * mergeFactor);
}
success = true;
}
finally
{
if (success)
{
CommitTransaction();
}
else
{
RollbackTransaction();
}
}
}
}
/// <summary> Merges the provided indexes into this index.
/// <p>
/// After this completes, the index is optimized.
/// </p>
/// <p>
/// The provided IndexReaders are not closed.
/// </p>
///
/// <p>
/// See {@link #AddIndexes(Directory[])} for details on transactional
/// semantics, temporary free space required in the Directory, and non-CFS
/// segments on an Exception.
/// </p>
/// </summary>
public virtual void AddIndexes(IndexReader[] readers)
{
lock (this)
{
Optimize(); // start with zero or 1 seg
System.String mergedName = NewSegmentName();
SegmentMerger merger = new SegmentMerger(this, mergedName);
System.Collections.ArrayList segmentsToDelete = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
IndexReader sReader = null;
if (segmentInfos.Count == 1)
{
// add existing index, if any
sReader = SegmentReader.Get(segmentInfos.Info(0));
merger.Add(sReader);
segmentsToDelete.Add(sReader); // queue segment for
// deletion
}
for (int i = 0; i < readers.Length; i++)
// add new indexes
merger.Add(readers[i]);
SegmentInfo info;
System.String segmentsInfosFileName = segmentInfos.GetCurrentSegmentFileName();
bool success = false;
StartTransaction();
try
{
int docCount = merger.Merge(); // merge 'em
segmentInfos.RemoveRange(0, segmentInfos.Count); // pop old infos & add new
info = new SegmentInfo(mergedName, docCount, directory, false, true);
segmentInfos.Add(info);
commitPending = true;
if (sReader != null)
sReader.Close();
success = true;
}
finally
{
if (!success)
{
RollbackTransaction();
}
else
{
CommitTransaction();
}
}
deleter.DeleteFile(segmentsInfosFileName); // delete old segments_N
// file
deleter.DeleteSegments(segmentsToDelete); // delete now-unused
// segments
if (useCompoundFile)
{
success = false;
segmentsInfosFileName = segmentInfos.GetCurrentSegmentFileName();
System.Collections.ArrayList filesToDelete;
StartTransaction();
try
{
filesToDelete = merger.CreateCompoundFile(mergedName + ".cfs");
info.SetUseCompoundFile(true);
commitPending = true;
success = true;
}
finally
{
if (!success)
{
RollbackTransaction();
}
else
{
CommitTransaction();
}
}
deleter.DeleteFile(segmentsInfosFileName); // delete old segments_N
// file
deleter.DeleteFiles(filesToDelete); // delete now unused files of
// segment
}
}
}
// Overview of merge policy:
//
// A flush is triggered either by close() or by the number of ram segments
// reaching maxBufferedDocs. After a disk segment is created by the flush,
// further merges may be triggered.
//
// LowerBound and upperBound set the limits on the doc count of a segment
// which may be merged. Initially, lowerBound is set to 0 and upperBound
// to maxBufferedDocs. Starting from the rightmost* segment whose doc count
// > lowerBound and <= upperBound, count the number of consecutive segments
// whose doc count <= upperBound.
//
// Case 1: number of worthy segments < mergeFactor, no merge, done.
// Case 2: number of worthy segments == mergeFactor, merge these segments.
// If the doc count of the merged segment <= upperBound, done.
// Otherwise, set lowerBound to upperBound, and multiply upperBound
// by mergeFactor, go through the process again.
// Case 3: number of worthy segments > mergeFactor (in the case mergeFactor
// M changes), merge the leftmost* M segments. If the doc count of
// the merged segment <= upperBound, consider the merged segment for
// further merges on this same level. Merge the now leftmost* M
// segments, and so on, until number of worthy segments < mergeFactor.
// If the doc count of all the merged segments <= upperBound, done.
// Otherwise, set lowerBound to upperBound, and multiply upperBound
// by mergeFactor, go through the process again.
// Note that case 2 can be considerd as a special case of case 3.
//
// This merge policy guarantees two invariants if M does not change and
// segment doc count is not reaching maxMergeDocs:
// B for maxBufferedDocs, f(n) defined as ceil(log_M(ceil(n/B)))
// 1: If i (left*) and i+1 (right*) are two consecutive segments of doc
// counts x and y, then f(x) >= f(y).
// 2: The number of committed segments on the same level (f(n)) <= M.
// This is called after pending added and deleted
// documents have been flushed to the Directory but before
// the change is committed (new segments_N file written).
internal virtual void DoAfterFlush()
{
}
protected internal void MaybeFlushRamSegments()
{
// A flush is triggered if enough new documents are buffered or
// if enough delete terms are buffered
if (ramSegmentInfos.Count >= minMergeDocs || numBufferedDeleteTerms >= maxBufferedDeleteTerms)
{
FlushRamSegments();
}
}
/// <summary> Expert: Flushes all RAM-resident segments (buffered documents), then may
/// merge segments.
/// </summary>
private void FlushRamSegments()
{
lock (this)
{
if (ramSegmentInfos.Count > 0 || bufferedDeleteTerms.Count > 0)
{
MergeSegments(ramSegmentInfos, 0, ramSegmentInfos.Count);
MaybeMergeSegments(minMergeDocs);
}
}
}
/// <summary> Flush all in-memory buffered updates (adds and deletes) to the Directory.
///
/// </summary>
/// <throws> IOException </throws>
public void Flush()
{
lock (this)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -