📄 segmentmerger.cs

📁 Lucene.Net 版本源码测试通过
💻 CS
📖 第 1 页 / 共 2 页
字号:
上一页 12
							continue;
						termVectorsWriter.AddAllDocVectors(reader.GetTermFreqVectors(docNum));
					}
				}
			}
			finally
			{
				termVectorsWriter.Close();
			}
		}
		
		private IndexOutput freqOutput = null;
		private IndexOutput proxOutput = null;
		private TermInfosWriter termInfosWriter = null;
		private int skipInterval;
		private SegmentMergeQueue queue = null;
		
		private void  MergeTerms()
		{
			try
			{
				freqOutput = directory.CreateOutput(segment + ".frq");
				proxOutput = directory.CreateOutput(segment + ".prx");
				termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos, termIndexInterval);
				skipInterval = termInfosWriter.skipInterval;
				queue = new SegmentMergeQueue(readers.Count);
				
				MergeTermInfos();
			}
			finally
			{
				if (freqOutput != null)
					freqOutput.Close();
				if (proxOutput != null)
					proxOutput.Close();
				if (termInfosWriter != null)
					termInfosWriter.Close();
				if (queue != null)
					queue.Close();
			}
		}
		
		private void  MergeTermInfos()
		{
			int base_Renamed = 0;
			for (int i = 0; i < readers.Count; i++)
			{
				IndexReader reader = (IndexReader) readers[i];
				TermEnum termEnum = reader.Terms();
				SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader);
				base_Renamed += reader.NumDocs();
				if (smi.Next())
					queue.Put(smi);
				// initialize queue
				else
					smi.Close();
			}
			
			SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];
			
			while (queue.Size() > 0)
			{
				int matchSize = 0; // pop matching terms
				match[matchSize++] = (SegmentMergeInfo) queue.Pop();
				Term term = match[0].term;
				SegmentMergeInfo top = (SegmentMergeInfo) queue.Top();
				
				while (top != null && term.CompareTo(top.term) == 0)
				{
					match[matchSize++] = (SegmentMergeInfo) queue.Pop();
					top = (SegmentMergeInfo) queue.Top();
				}
				
				MergeTermInfo(match, matchSize); // add new TermInfo
				
				while (matchSize > 0)
				{
					SegmentMergeInfo smi = match[--matchSize];
					if (smi.Next())
						queue.Put(smi);
					// restore queue
					else
						smi.Close(); // done with a segment
				}
			}
		}
		
		private TermInfo termInfo = new TermInfo(); // minimize consing
		
		/// <summary>Merge one term found in one or more segments. The array <code>smis</code>
		/// contains segments that are positioned at the same term. <code>N</code>
		/// is the number of cells in the array actually occupied.
		/// 
		/// </summary>
		/// <param name="smis">array of segments
		/// </param>
		/// <param name="n">number of cells in the array actually occupied
		/// </param>
		private void  MergeTermInfo(SegmentMergeInfo[] smis, int n)
		{
			long freqPointer = freqOutput.GetFilePointer();
			long proxPointer = proxOutput.GetFilePointer();
			
			int df = AppendPostings(smis, n); // append posting data
			
			long skipPointer = WriteSkip();
			
			if (df > 0)
			{
				// add an entry to the dictionary with pointers to prox and freq files
				termInfo.Set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
				termInfosWriter.Add(smis[0].term, termInfo);
			}
		}
		
		/// <summary>Process postings from multiple segments all positioned on the
		/// same term. Writes out merged entries into freqOutput and
		/// the proxOutput streams.
		/// 
		/// </summary>
		/// <param name="smis">array of segments
		/// </param>
		/// <param name="n">number of cells in the array actually occupied
		/// </param>
		/// <returns> number of documents across all segments where this term was found
		/// </returns>
		private int AppendPostings(SegmentMergeInfo[] smis, int n)
		{
			int lastDoc = 0;
			int df = 0; // number of docs w/ term
			ResetSkip();
			for (int i = 0; i < n; i++)
			{
				SegmentMergeInfo smi = smis[i];
				TermPositions postings = smi.GetPositions();
				int base_Renamed = smi.base_Renamed;
				int[] docMap = smi.GetDocMap();
				postings.Seek(smi.termEnum);
				while (postings.Next())
				{
					int doc = postings.Doc();
					if (docMap != null)
						doc = docMap[doc]; // map around deletions
					doc += base_Renamed; // convert to merged space
					
					if (doc < 0 || (df > 0 && doc <= lastDoc))
						throw new System.SystemException("docs out of order (" + doc + " <= " + lastDoc + " )");
					
					df++;
					
					if ((df % skipInterval) == 0)
					{
						BufferSkip(lastDoc);
					}
					
					int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
					lastDoc = doc;
					
					int freq = postings.Freq();
					if (freq == 1)
					{
						freqOutput.WriteVInt(docCode | 1); // write doc & freq=1
					}
					else
					{
						freqOutput.WriteVInt(docCode); // write doc
						freqOutput.WriteVInt(freq); // write frequency in doc
					}
					
					int lastPosition = 0; // write position deltas
					for (int j = 0; j < freq; j++)
					{
						int position = postings.NextPosition();
						proxOutput.WriteVInt(position - lastPosition);
						lastPosition = position;
					}
				}
			}
			return df;
		}
		
		private RAMOutputStream skipBuffer = new RAMOutputStream();
		private int lastSkipDoc;
		private long lastSkipFreqPointer;
		private long lastSkipProxPointer;
		
		private void  ResetSkip()
		{
			skipBuffer.Reset();
			lastSkipDoc = 0;
			lastSkipFreqPointer = freqOutput.GetFilePointer();
			lastSkipProxPointer = proxOutput.GetFilePointer();
		}
		
		private void  BufferSkip(int doc)
		{
			long freqPointer = freqOutput.GetFilePointer();
			long proxPointer = proxOutput.GetFilePointer();
			
			skipBuffer.WriteVInt(doc - lastSkipDoc);
			skipBuffer.WriteVInt((int) (freqPointer - lastSkipFreqPointer));
			skipBuffer.WriteVInt((int) (proxPointer - lastSkipProxPointer));
			
			lastSkipDoc = doc;
			lastSkipFreqPointer = freqPointer;
			lastSkipProxPointer = proxPointer;
		}
		
		private long WriteSkip()
		{
			long skipPointer = freqOutput.GetFilePointer();
			skipBuffer.WriteTo(freqOutput);
			return skipPointer;
		}
		
		private void  MergeNorms()
		{
			byte[] normBuffer = null;
			IndexOutput output = null;
			try
			{
				for (int i = 0; i < fieldInfos.Size(); i++)
				{
					FieldInfo fi = fieldInfos.FieldInfo(i);
					if (fi.isIndexed && !fi.omitNorms)
					{
						if (output == null)
						{
							output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION);
							output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length);
						}
						for (int j = 0; j < readers.Count; j++)
						{
							IndexReader reader = (IndexReader) readers[j];
							int maxDoc = reader.MaxDoc();
							if (normBuffer == null || normBuffer.Length < maxDoc)
							{
								// the buffer is too small for the current segment
								normBuffer = new byte[maxDoc];
							}
							reader.Norms(fi.name, normBuffer, 0);
							if (!reader.HasDeletions())
							{
								//optimized case for segments without deleted docs
								output.WriteBytes(normBuffer, maxDoc);
							}
							else
							{
								// this segment has deleted docs, so we have to
								// check for every doc if it is deleted or not
								for (int k = 0; k < maxDoc; k++)
								{
									if (!reader.IsDeleted(k))
									{
										output.WriteByte(normBuffer[k]);
									}
								}
							}
						}
					}
				}
			}
			finally
			{
				if (output != null)
				{
					output.Close();
				}
			}
		}
	}
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -