📄 segmentmerger.cs
字号:
continue;
termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
}
}
}
finally
{
termVectorsWriter.Close();
}
}
private IndexOutput freqOutput = null;
private IndexOutput proxOutput = null;
private TermInfosWriter termInfosWriter = null;
private int skipInterval;
private SegmentMergeQueue queue = null;
private void MergeTerms()
{
try
{
freqOutput = directory.CreateOutput(segment + ".frq");
proxOutput = directory.CreateOutput(segment + ".prx");
termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
skipInterval = termInfosWriter.skipInterval;
queue = new SegmentMergeQueue(readers.Count);
MergeTermInfos();
}
finally
{
if (freqOutput != null)
freqOutput.Close();
if (proxOutput != null)
proxOutput.Close();
if (termInfosWriter != null)
termInfosWriter.Close();
if (queue != null)
queue.Close();
}
}
private void MergeTermInfos()
{
int base_Renamed = 0;
for (int i = 0; i < readers.Count; i++)
{
IndexReader reader = (IndexReader) readers[i];
TermEnum termEnum = reader.Terms();
SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
base_Renamed += reader.NumDocs();
if (smi.Next())
queue.Put(smi);
// initialize queue
else
smi.Close();
}
SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];
while (queue.Size() > 0)
{
int matchSize = 0; // pop matching terms
match[matchSize++] = (SegmentMergeInfo) queue.Pop();
Term term = match[0].term;
SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
while (top != null && term.CompareTo(top.term) == 0)
{
match[matchSize++] = (SegmentMergeInfo) queue.Pop();
top = (SegmentMergeInfo) queue.Top();
}
MergeTermInfo(match, matchSize); // add new TermInfo
while (matchSize > 0)
{
SegmentMergeInfo smi = match[--matchSize];
if (smi.Next())
queue.Put(smi);
// restore queue
else
smi.Close(); // done with a segment
}
}
}
private TermInfo termInfo = new TermInfo(); // minimize consing
/// <summary>Merge one term found in one or more segments. The array <code>smis</code>
/// contains segments that are positioned at the same term. <code>N</code>
/// is the number of cells in the array actually occupied.
///
/// </summary>
/// <param name="smis">array of segments
/// </param>
/// <param name="n">number of cells in the array actually occupied
/// </param>
private void MergeTermInfo(SegmentMergeInfo[] smis, int n)
{
long freqPointer = freqOutput.GetFilePointer();
long proxPointer = proxOutput.GetFilePointer();
int df = AppendPostings(smis, n); // append posting data
long skipPointer = WriteSkip();
if (df > 0)
{
// add an entry to the dictionary with pointers to prox and freq files
termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
termInfosWriter.Add(smis[0].term, termInfo);
}
}
/// <summary>Process postings from multiple segments all positioned on the
/// same term. Writes out merged entries into freqOutput and
/// the proxOutput streams.
///
/// </summary>
/// <param name="smis">array of segments
/// </param>
/// <param name="n">number of cells in the array actually occupied
/// </param>
/// <returns> number of documents across all segments where this term was found
/// </returns>
private int AppendPostings(SegmentMergeInfo[] smis, int n)
{
int lastDoc = 0;
int df = 0; // number of docs w/ term
ResetSkip();
for (int i = 0; i < n; i++)
{
SegmentMergeInfo smi = smis[i];
TermPositions postings = smi.GetPositions();
int base_Renamed = smi.base_Renamed;
int[] docMap = smi.GetDocMap();
postings.Seek(smi.termEnum);
while (postings.Next())
{
int doc = postings.Doc();
if (docMap != null)
doc = docMap[doc]; // map around deletions
doc += base_Renamed; // convert to merged space
if (doc < 0 || (df > 0 && doc <= lastDoc))
throw new System.SystemException("docs out of order (" + doc + " <= " + lastDoc + " )");
df++;
if ((df % skipInterval) == 0)
{
BufferSkip(lastDoc);
}
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
lastDoc = doc;
int freq = postings.Freq();
if (freq == 1)
{
freqOutput.WriteVInt(docCode | 1); // write doc & freq=1
}
else
{
freqOutput.WriteVInt(docCode); // write doc
freqOutput.WriteVInt(freq); // write frequency in doc
}
int lastPosition = 0; // write position deltas
for (int j = 0; j < freq; j++)
{
int position = postings.NextPosition();
proxOutput.WriteVInt(position - lastPosition);
lastPosition = position;
}
}
}
return df;
}
private RAMOutputStream skipBuffer = new RAMOutputStream();
private int lastSkipDoc;
private long lastSkipFreqPointer;
private long lastSkipProxPointer;
private void ResetSkip()
{
skipBuffer.Reset();
lastSkipDoc = 0;
lastSkipFreqPointer = freqOutput.GetFilePointer();
lastSkipProxPointer = proxOutput.GetFilePointer();
}
private void BufferSkip(int doc)
{
long freqPointer = freqOutput.GetFilePointer();
long proxPointer = proxOutput.GetFilePointer();
skipBuffer.WriteVInt(doc - lastSkipDoc);
skipBuffer.WriteVInt((int) (freqPointer - lastSkipFreqPointer));
skipBuffer.WriteVInt((int) (proxPointer - lastSkipProxPointer));
lastSkipDoc = doc;
lastSkipFreqPointer = freqPointer;
lastSkipProxPointer = proxPointer;
}
private long WriteSkip()
{
long skipPointer = freqOutput.GetFilePointer();
skipBuffer.WriteTo(freqOutput);
return skipPointer;
}
private void MergeNorms()
{
byte[] normBuffer = null;
IndexOutput output = null;
try
{
for (int i = 0; i < fieldInfos.Size(); i++)
{
FieldInfo fi = fieldInfos.FieldInfo(i);
if (fi.isIndexed && !fi.omitNorms)
{
if (output == null)
{
output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
}
for (int j = 0; j < readers.Count; j++)
{
IndexReader reader = (IndexReader) readers[j];
int maxDoc = reader.MaxDoc();
if (normBuffer == null || normBuffer.Length < maxDoc)
{
// the buffer is too small for the current segment
normBuffer = new byte[maxDoc];
}
reader.Norms(fi.name, normBuffer, 0);
if (!reader.HasDeletions())
{
//optimized case for segments without deleted docs
output.WriteBytes(normBuffer, maxDoc);
}
else
{
// this segment has deleted docs, so we have to
// check for every doc if it is deleted or not
for (int k = 0; k < maxDoc; k++)
{
if (!reader.IsDeleted(k))
{
output.WriteByte(normBuffer[k]);
}
}
}
}
}
}
}
finally
{
if (output != null)
{
output.Close();
}
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -