📄 indexwriter.cs

📁 Lucene.Net 版本源码测试通过
💻 CS
📖 第 1 页 / 共 5 页
字号:
				ramSegmentInfos.Add(newSegmentInfo);
				MaybeFlushRamSegments();
			}
		}
		
		internal virtual SegmentInfo BuildSingleDocSegment(Document doc, Analyzer analyzer)
		{
			DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);
			dw.SetInfoStream(infoStream);
			System.String segmentName = NewRamSegmentName();
			dw.AddDocument(segmentName, doc);
			return new SegmentInfo(segmentName, 1, ramDirectory, false, false);
		}
		
		/// <summary> Deletes the document(s) containing <code>term</code>.
		/// 
		/// </summary>
		/// <param name="">term
		/// the term to identify the documents to be deleted
		/// </param>
		public virtual void  DeleteDocuments(Term term)
		{
			lock (this)
			{
				BufferDeleteTerm(term);
				MaybeFlushRamSegments();
			}
		}
		
		/// <summary> Deletes the document(s) containing any of the terms. All deletes are
		/// flushed at the same time.
		/// 
		/// </summary>
		/// <param name="">terms
		/// array of terms to identify the documents to be deleted
		/// </param>
		public virtual void  DeleteDocuments(Term[] terms)
		{
			lock (this)
			{
				for (int i = 0; i < terms.Length; i++)
				{
					BufferDeleteTerm(terms[i]);
				}
				MaybeFlushRamSegments();
			}
		}
		
		/// <summary> Updates a document by first deleting the document(s) containing
		/// <code>term</code> and then adding the new document. The delete and then
		/// add are atomic as seen by a reader on the same index (flush may happen
		/// only after the add).
		/// 
		/// </summary>
		/// <param name="">term
		/// the term to identify the document(s) to be deleted
		/// </param>
		/// <param name="">doc
		/// the document to be added
		/// </param>
		public virtual void  UpdateDocument(Term term, Document doc)
		{
			UpdateDocument(term, doc, GetAnalyzer());
		}
		
		/// <summary> Updates a document by first deleting the document(s) containing
		/// <code>term</code> and then adding the new document. The delete and then
		/// add are atomic as seen by a reader on the same index (flush may happen
		/// only after the add).
		/// 
		/// </summary>
		/// <param name="">term
		/// the term to identify the document(s) to be deleted
		/// </param>
		/// <param name="">doc
		/// the document to be added
		/// </param>
		/// <param name="">analyzer
		/// the analyzer to use when analyzing the document
		/// </param>
		public virtual void  UpdateDocument(Term term, Document doc, Analyzer analyzer)
		{
			SegmentInfo newSegmentInfo = BuildSingleDocSegment(doc, analyzer);
			lock (this)
			{
				BufferDeleteTerm(term);
				ramSegmentInfos.Add(newSegmentInfo);
				MaybeFlushRamSegments();
			}
		}
		
		internal System.String NewRamSegmentName()
		{
			lock (this)
			{
				return "_ram_" + System.Convert.ToString(ramSegmentInfos.counter++, 16);
			}
		}
		
		// for test purpose
		internal int GetSegmentCount()
		{
			lock (this)
			{
				return segmentInfos.Count;
			}
		}
		
		// for test purpose
		internal int GetRamSegmentCount()
		{
			lock (this)
			{
				return ramSegmentInfos.Count;
			}
		}
		
		// for test purpose
		internal int GetDocCount(int i)
		{
			lock (this)
			{
				if (i >= 0 && i < segmentInfos.Count)
				{
					return segmentInfos.Info(i).docCount;
				}
				else
				{
					return - 1;
				}
			}
		}
		
		internal System.String NewSegmentName()
		{
			lock (this)
			{
				return "_" + System.Convert.ToString(segmentInfos.counter++, 16);
			}
		}
		
		/// <summary> Determines how often segment indices are merged by addDocument(). With
		/// smaller values, less RAM is used while indexing, and searches on
		/// unoptimized indices are faster, but indexing speed is slower. With larger
		/// values, more RAM is used during indexing, and while searches on
		/// unoptimized indices are slower, indexing is faster. Thus larger values (>
		/// 10) are best for batch index creation, and smaller values (< 10) for
		/// indices that are interactively maintained.
		/// 
		/// <p>
		/// This must never be less than 2. The default value is
		/// {@link #DEFAULT_MERGE_FACTOR}.
		/// 
		/// </summary>
		private int mergeFactor = DEFAULT_MERGE_FACTOR;
		
		/// <summary> Determines the minimal number of documents required before the buffered
		/// in-memory documents are merging and a new Segment is created. Since
		/// Documents are merged in a {@link Lucene.Net.Store.RAMDirectory},
		/// large value gives faster indexing. At the same time, mergeFactor limits
		/// the number of files open in a FSDirectory.
		/// 
		/// <p>
		/// The default value is {@link #DEFAULT_MAX_BUFFERED_DOCS}.
		/// 
		/// </summary>
		private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS;
		
		/// <summary> Determines the largest number of documents ever merged by addDocument().
		/// Small values (e.g., less than 10,000) are best for interactive indexing,
		/// as this limits the length of pauses while indexing to a few seconds.
		/// Larger values are best for batched indexing and speedier searches.
		/// 
		/// <p>
		/// The default value is {@link #DEFAULT_MAX_MERGE_DOCS}.
		/// 
		/// </summary>
		private int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
		
		/// <summary> If non-null, information about merges will be printed to this.
		/// 
		/// </summary>
		private System.IO.TextWriter infoStream = null;
		
		/// <summary> Merges all segments together into a single segment, optimizing an index
		/// for search.
		/// 
		/// <p>
		/// Note that this requires substantial temporary free space in the Directory
		/// (see <a target="_top"
		/// href="http://issues.apache.org/jira/browse/LUCENE-764">LUCENE-764</a>
		/// for details):
		/// </p>
		/// 
		/// <ul>
		/// <li>
		/// 
		/// <p>
		/// If no readers/searchers are open against the index, then free space
		/// required is up to 1X the total size of the starting index. For example,
		/// if the starting index is 10 GB, then you must have up to 10 GB of free
		/// space before calling optimize.
		/// </p>
		/// 
		/// <li>
		/// 
		/// <p>
		/// If readers/searchers are using the index, then free space required is up
		/// to 2X the size of the starting index. This is because in addition to the
		/// 1X used by optimize, the original 1X of the starting index is still
		/// consuming space in the Directory as the readers are holding the segments
		/// files open. Even on Unix, where it will appear as if the files are gone
		/// ("ls" won't list them), they still consume storage due to "delete on last
		/// close" semantics.
		/// </p>
		/// 
		/// <p>
		/// Furthermore, if some but not all readers re-open while the optimize is
		/// underway, this will cause > 2X temporary space to be consumed as those
		/// new readers will then hold open the partially optimized segments at that
		/// time. It is best not to re-open readers while optimize is running.
		/// </p>
		/// 
		/// </ul>
		/// 
		/// <p>
		/// The actual temporary usage could be much less than these figures (it
		/// depends on many factors).
		/// </p>
		/// 
		/// <p>
		/// Once the optimize completes, the total size of the index will be less
		/// than the size of the starting index. It could be quite a bit smaller (if
		/// there were many pending deletes) or just slightly smaller.
		/// </p>
		/// 
		/// <p>
		/// If an Exception is hit during optimize(), for example due to disk full,
		/// the index will not be corrupt and no documents will have been lost.
		/// However, it may have been partially optimized (some segments were merged
		/// but not all), and it's possible that one of the segments in the index
		/// will be in non-compound format even when using compound file format. This
		/// will occur when the Exception is hit during conversion of the segment
		/// into compound format.
		/// </p>
		/// </summary>
		public virtual void  Optimize()
		{
			lock (this)
			{
				FlushRamSegments();
				while (segmentInfos.Count > 1 || (segmentInfos.Count == 1 && (SegmentReader.HasDeletions(segmentInfos.Info(0)) || SegmentReader.HasSeparateNorms(segmentInfos.Info(0)) || segmentInfos.Info(0).dir != directory || (useCompoundFile && (!SegmentReader.UsesCompoundFile(segmentInfos.Info(0)))))))
				{
					int minSegment = segmentInfos.Count - mergeFactor;
					MergeSegments(segmentInfos, minSegment < 0?0:minSegment, segmentInfos.Count);
				}
			}
		}
		
		/*
		* Begin a transaction. During a transaction, any segment merges that happen
		* (or ram segments flushed) will not write a new segments file and will not
		* remove any files that were present at the start of the transaction. You
		* must make a matched (try/finall) call to commitTransaction() or
		* rollbackTransaction() to finish the transaction.
		*/
		private void  StartTransaction()
		{
			if (inTransaction)
			{
				throw new System.IO.IOException("transaction is already in process");
			}
			rollbackSegmentInfos = (SegmentInfos) segmentInfos.Clone();
			protectedSegments = new System.Collections.Hashtable();
			for (int i = 0; i < segmentInfos.Count; i++)
			{
				SegmentInfo si = (SegmentInfo) segmentInfos[i];
				protectedSegments.Add(si.name, si.name);
			}
			inTransaction = true;
		}
		
		/*
		* Rolls back the transaction and restores state to where we were at the
		* start.
		*/
		private void  RollbackTransaction()
		{
			
			// Keep the same segmentInfos instance but replace all
			// of its SegmentInfo instances. This is so the next
			// attempt to commit using this instance of IndexWriter
			// will always write to a new generation ("write once").
			segmentInfos.Clear();
			segmentInfos.AddRange(rollbackSegmentInfos);
			
			// Ask deleter to locate unreferenced files & remove
			// them:
			deleter.ClearPendingFiles();
			deleter.FindDeletableFiles();
			deleter.DeleteFiles();
			
			ClearTransaction();
		}
		
		/*
		* Commits the transaction. This will write the new segments file and remove
		* and pending deletions we have accumulated during the transaction
		*/
		private void  CommitTransaction()
		{
			if (commitPending)
			{
				bool success = false;
				try
				{
					// If we hit eg disk full during this write we have
					// to rollback.:
					segmentInfos.Write(directory); // commit changes
					success = true;
				}
				finally
				{
					if (!success)
					{
						RollbackTransaction();
					}
				}
				deleter.CommitPendingFiles();
				commitPending = false;
			}
			
			ClearTransaction();
		}
		
		/*
		* Should only be called by rollbackTransaction & commitTransaction
		*/
		private void  ClearTransaction()
		{
			protectedSegments = null;
			rollbackSegmentInfos = null;
			inTransaction = false;
		}
		
		/// <summary> Merges all segments from an array of indexes into this index.
		/// 
		/// <p>
		/// This may be used to parallelize batch indexing. A large document
		/// collection can be broken into sub-collections. Each sub-collection can be
		/// indexed in parallel, on a different thread, process or machine. The
		/// complete index can then be created by merging sub-collection indexes with
		/// this method.
		/// 
		/// <p>
		/// After this completes, the index is optimized.
		/// 
		/// <p>
		/// This method is transactional in how Exceptions are handled: it does not
		/// commit a new segments_N file until all indexes are added. This means if
		/// an Exception occurs (for example disk full), then either no indexes will
		/// have been added or they all will have been.
		/// </p>
		/// 
		/// <p>
		/// If an Exception is hit, it's still possible that all indexes were
		/// successfully added. This happens when the Exception is hit when trying to
		/// build a CFS file. In this case, one segment in the index will be in
		/// non-CFS format, even when using compound file format.
		/// </p>
		/// 
		/// <p>
		/// Also note that on an Exception, the index may still have been partially
		/// or fully optimized even though none of the input indexes were added.
		/// </p>
		/// 
		/// <p>
		/// Note that this requires temporary free space in the Directory up to 2X
		/// the sum of all input indexes (including the starting index). If
		/// readers/searchers are open against the starting index, then temporary
		/// free space required will be higher by the size of the starting index (see
		/// {@link #Optimize()} for details).
		/// </p>
		/// 
		/// <p>
		/// Once this completes, the final size of the index will be less than the
		/// sum of all input index sizes (including the starting index). It could be
		/// quite a bit smaller (if there were many pending deletes) or just slightly
		/// smaller.
		/// </p>
		/// 
		/// <p>
		/// See <a target="_top"
		/// href="http://issues.apache.org/jira/browse/LUCENE-702">LUCENE-702</a>
		/// for details.
		/// </p>
		/// </summary>
		public virtual void  AddIndexes(Directory[] dirs)
		{
			lock (this)
			{
				
				Optimize(); // start with zero or 1 seg
				
				int start = segmentInfos.Count;
				
				bool success = false;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -