📄 indexwriter.cs

📁 Lucene.Net 版本源码测试通过
💻 CS
📖 第 1 页 / 共 5 页
字号:
		private IndexWriter(Directory d, Analyzer a, bool create, bool closeDir)
		{
			InitBlock();
			Init(d, a, create, closeDir);
		}
		
		private void  Init(System.String path, Analyzer a, bool create)
		{
			Init(FSDirectory.GetDirectory(path), a, create, true);
		}
		
		private void  Init(System.IO.FileInfo path, Analyzer a, bool create)
		{
			Init(FSDirectory.GetDirectory(path), a, create, true);
		}
		
		private void  Init(Directory d, Analyzer a, bool create, bool closeDir)
		{
			this.closeDir = closeDir;
			directory = d;
			analyzer = a;
			
			if (create)
			{
				// Clear the write lock in case it's leftover:
				directory.ClearLock(IndexWriter.WRITE_LOCK_NAME);
			}
			
			Lock writeLock = directory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
			if (!writeLock.Obtain(writeLockTimeout))
			// obtain write lock
			{
				throw new System.IO.IOException("Index locked for write: " + writeLock);
			}
			this.writeLock = writeLock; // save it
			
			try
			{
				if (create)
				{
					// Try to read first. This is to allow create
					// against an index that's currently open for
					// searching. In this case we write the next
					// segments_N file with no segments:
					try
					{
						segmentInfos.Read(directory);
						segmentInfos.Clear();
					}
					catch (System.IO.IOException e)
					{
						// Likely this means it's a fresh directory
					}
					segmentInfos.Write(directory);
				}
				else
				{
					segmentInfos.Read(directory);
				}
				
				// Create a deleter to keep track of which files can
				// be deleted:
				deleter = new IndexFileDeleter(segmentInfos, directory);
				deleter.SetInfoStream(infoStream);
				deleter.FindDeletableFiles();
				deleter.DeleteFiles();
			}
			catch (System.IO.IOException e)
			{
				this.writeLock.Release();
				this.writeLock = null;
				throw e;
			}
		}
		
		/// <summary> Determines the largest number of documents ever merged by addDocument().
		/// Small values (e.g., less than 10,000) are best for interactive indexing,
		/// as this limits the length of pauses while indexing to a few seconds.
		/// Larger values are best for batched indexing and speedier searches.
		/// 
		/// <p>
		/// The default value is {@link Integer#MAX_VALUE}.
		/// </summary>
		public virtual void  SetMaxMergeDocs(int maxMergeDocs)
		{
			this.maxMergeDocs = maxMergeDocs;
		}
		
		/// <seealso cref="#setMaxMergeDocs">
		/// </seealso>
		public virtual int GetMaxMergeDocs()
		{
			return maxMergeDocs;
		}
		
		/// <summary> The maximum number of terms that will be indexed for a single field in a
		/// document. This limits the amount of memory required for indexing, so that
		/// collections with very large files will not crash the indexing process by
		/// running out of memory.<p/> Note that this effectively truncates large
		/// documents, excluding from the index terms that occur further in the
		/// document. If you know your source documents are large, be sure to set
		/// this value high enough to accomodate the expected size. If you set it to
		/// Integer.MAX_VALUE, then the only limit is your memory, but you should
		/// anticipate an OutOfMemoryError.<p/> By default, no more than 10,000
		/// terms will be indexed for a field.
		/// </summary>
		public virtual void  SetMaxFieldLength(int maxFieldLength)
		{
			this.maxFieldLength = maxFieldLength;
		}
		
		/// <seealso cref="#setMaxFieldLength">
		/// </seealso>
		public virtual int GetMaxFieldLength()
		{
			return maxFieldLength;
		}
		
		/// <summary> Determines the minimal number of documents required before the buffered
		/// in-memory documents are merged and a new Segment is created. Since
		/// Documents are merged in a {@link Lucene.Net.Store.RAMDirectory},
		/// large value gives faster indexing. At the same time, mergeFactor limits
		/// the number of files open in a FSDirectory.
		/// 
		/// <p>
		/// The default value is 10.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException </throws>
		/// <summary>             if maxBufferedDocs is smaller than 2
		/// </summary>
		public virtual void  SetMaxBufferedDocs(int maxBufferedDocs)
		{
			if (maxBufferedDocs < 2)
				throw new System.ArgumentException("maxBufferedDocs must at least be 2");
			this.minMergeDocs = maxBufferedDocs;
		}
		
		/// <seealso cref="#setMaxBufferedDocs">
		/// </seealso>
		public virtual int GetMaxBufferedDocs()
		{
			return minMergeDocs;
		}
		
		/// <summary> <p>
		/// Determines the minimal number of delete terms required before the
		/// buffered in-memory delete terms are applied and flushed. If there are
		/// documents buffered in memory at the time, they are merged and a new
		/// segment is created.
		/// </p>
		/// 
		/// <p>
		/// The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}.
		/// 
		/// </summary>
		/// <throws>  IllegalArgumentException </throws>
		/// <summary>             if maxBufferedDeleteTerms is smaller than 1
		/// </p>
		/// </summary>
		public virtual void  SetMaxBufferedDeleteTerms(int maxBufferedDeleteTerms)
		{
			if (maxBufferedDeleteTerms < 1)
				throw new System.ArgumentException("maxBufferedDeleteTerms must at least be 1");
			this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
		}
		
		/// <seealso cref="#setMaxBufferedDeleteTerms">
		/// </seealso>
		public virtual int GetMaxBufferedDeleteTerms()
		{
			return maxBufferedDeleteTerms;
		}
		
		/// <summary> Determines how often segment indices are merged by addDocument(). With
		/// smaller values, less RAM is used while indexing, and searches on
		/// unoptimized indices are faster, but indexing speed is slower. With larger
		/// values, more RAM is used during indexing, and while searches on
		/// unoptimized indices are slower, indexing is faster. Thus larger values (>
		/// 10) are best for batch index creation, and smaller values (< 10) for
		/// indices that are interactively maintained.
		/// 
		/// <p>
		/// This must never be less than 2. The default value is 10.
		/// </summary>
		public virtual void  SetMergeFactor(int mergeFactor)
		{
			if (mergeFactor < 2)
				throw new System.ArgumentException("mergeFactor cannot be less than 2");
			this.mergeFactor = mergeFactor;
		}
		
		/// <seealso cref="#setMergeFactor">
		/// </seealso>
		public virtual int GetMergeFactor()
		{
			return mergeFactor;
		}
		
		/// <summary> If non-null, information about merges and a message when maxFieldLength
		/// is reached will be printed to this.
		/// </summary>
		public virtual void  SetInfoStream(System.IO.TextWriter infoStream)
		{
			this.infoStream = infoStream;
		}
		
		/// <seealso cref="#setInfoStream">
		/// </seealso>
		public virtual System.IO.TextWriter GetInfoStream()
		{
			return infoStream;
		}
		
		/// <summary> Sets the maximum time to wait for a write lock (in milliseconds) for this
		/// instance of IndexWriter.
		/// 
		/// </summary>
		/// <seealso cref="">
		/// </seealso>
		/// <seealso cref="to change the default value for all">
		/// instances of IndexWriter.
		/// </seealso>
		public virtual void  SetWriteLockTimeout(long writeLockTimeout)
		{
			this.writeLockTimeout = writeLockTimeout;
		}
		
		/// <seealso cref="#setWriteLockTimeout">
		/// </seealso>
		public virtual long GetWriteLockTimeout()
		{
			return writeLockTimeout;
		}
		
		/// <summary> Sets the default (for any instance of IndexWriter) maximum time to wait
		/// for a write lock (in milliseconds).
		/// </summary>
		public static void  SetDefaultWriteLockTimeout(long writeLockTimeout)
		{
			IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout;
		}
		
		/// <seealso cref="#setDefaultWriteLockTimeout">
		/// </seealso>
		public static long GetDefaultWriteLockTimeout()
		{
			return IndexWriter.WRITE_LOCK_TIMEOUT;
		}
		
		/// <summary> Flushes all changes to an index and closes all associated files.
		/// 
		/// <p>
		/// If an Exception is hit during close, eg due to disk full or some other
		/// reason, then both the on-disk index and the internal state of the
		/// IndexWriter instance will be consistent. However, the close will not be
		/// complete even though part of it (flushing buffered documents) may have
		/// succeeded, so the write lock will still be held.
		/// </p>
		/// 
		/// <p>
		/// If you can correct the underlying cause (eg free up some disk space) then
		/// you can call close() again. Failing that, if you want to force the write
		/// lock to be released (dangerous, because you may then lose buffered docs
		/// in the IndexWriter instance) then you can do something like this:
		/// </p>
		/// 
		/// <pre>
		/// try {
		/// writer.close();
		/// } finally {
		/// if (IndexReader.isLocked(directory)) {
		/// IndexReader.unlock(directory);
		/// }
		/// }
		/// </pre>
		/// 
		/// after which, you must be certain not to use the writer instance anymore.
		/// </p>
		/// </summary>
		public virtual void  Close()
		{
			lock (this)
			{
				FlushRamSegments();
				ramDirectory.Close();
				if (writeLock != null)
				{
					writeLock.Release(); // release write lock
					writeLock = null;
				}
				if (closeDir)
					directory.Close();
			}
		}
		
		/// <summary>Release the write lock, if needed. </summary>
		~IndexWriter()
		{
			try
			{
				if (writeLock != null)
				{
					writeLock.Release(); // release write lock
					writeLock = null;
				}
			}
			finally
			{
			}
		}
		
		/// <summary>Returns the Directory used by this index. </summary>
		public virtual Directory GetDirectory()
		{
			return directory;
		}
		
		/// <summary>Returns the analyzer used by this index. </summary>
		public virtual Analyzer GetAnalyzer()
		{
			return analyzer;
		}
		
		/// <summary>Returns the number of documents currently in this index. </summary>
		public virtual int DocCount()
		{
			lock (this)
			{
				int count = ramSegmentInfos.Count;
				for (int i = 0; i < segmentInfos.Count; i++)
				{
					SegmentInfo si = segmentInfos.Info(i);
					count += si.docCount;
				}
				return count;
			}
		}
		
		/// <summary> The maximum number of terms that will be indexed for a single field in a
		/// document. This limits the amount of memory required for indexing, so that
		/// collections with very large files will not crash the indexing process by
		/// running out of memory.<p/> Note that this effectively truncates large
		/// documents, excluding from the index terms that occur further in the
		/// document. If you know your source documents are large, be sure to set
		/// this value high enough to accomodate the expected size. If you set it to
		/// Integer.MAX_VALUE, then the only limit is your memory, but you should
		/// anticipate an OutOfMemoryError.<p/> By default, no more than 10,000
		/// terms will be indexed for a field.
		/// 
		/// </summary>
		private int maxFieldLength = DEFAULT_MAX_FIELD_LENGTH;
		
		/// <summary> Adds a document to this index. If the document contains more than
		/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder
		/// are discarded.
		/// 
		/// <p>
		/// Note that if an Exception is hit (for example disk full) then the index
		/// will be consistent, but this document may not have been added.
		/// Furthermore, it's possible the index will have one segment in
		/// non-compound format even when using compound files (when a merge has
		/// partially succeeded).
		/// </p>
		/// 
		/// <p>
		/// This method periodically flushes pending documents to the Directory
		/// (every {@link #setMaxBufferedDocs}), and also periodically merges
		/// segments in the index (every {@link #setMergeFactor} flushes). When this
		/// occurs, the method will take more time to run (possibly a long time if
		/// the index is large), and will require free temporary space in the
		/// Directory to do the merging.
		/// </p>
		/// 
		/// <p>
		/// The amount of free space required when a merge is triggered is up to 1X
		/// the size of all segments being merged, when no readers/searchers are open
		/// against the index, and up to 2X the size of all segments being merged
		/// when readers/searchers are open against the index (see
		/// {@link #Optimize()} for details). Most merges are small (merging the
		/// smallest segments together), but whenever a full merge occurs (all
		/// segments in the index, which is the worst case for temporary space usage)
		/// then the maximum free disk space required is the same as
		/// {@link #optimize}.
		/// </p>
		/// </summary>
		public virtual void  AddDocument(Document doc)
		{
			AddDocument(doc, analyzer);
		}
		
		/// <summary> Adds a document to this index, using the provided analyzer instead of the
		/// value of {@link #GetAnalyzer()}. If the document contains more than
		/// {@link #SetMaxFieldLength(int)} terms for a given field, the remainder
		/// are discarded.
		/// 
		/// <p>
		/// See {@link #AddDocument(Document)} for details on index and IndexWriter
		/// state after an Exception, and flushing/merging temporary free space
		/// requirements.
		/// </p>
		/// </summary>
		public virtual void  AddDocument(Document doc, Analyzer analyzer)
		{
			SegmentInfo newSegmentInfo = BuildSingleDocSegment(doc, analyzer);
			lock (this)
			{
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -