📄 fieldsreader.cs

📁 Lucene.Net 版本源码测试通过
💻 CS
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using System;
using Lucene.Net.Documents;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;

namespace Lucene.Net.Index
{
	
	/// <summary> Class responsible for access to stored document fields.
	/// <p/>
	/// It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
	/// 
	/// </summary>
	/// <version>  $Id: FieldsReader.java 507009 2007-02-13 14:06:52Z gsingers $
	/// </version>
	public sealed class FieldsReader
	{
		private FieldInfos fieldInfos;
		
		// The main fieldStream, used only for cloning.
		private IndexInput cloneableFieldsStream;
		
		// This is a clone of cloneableFieldsStream used for reading documents.
		// It should not be cloned outside of a synchronized context.
		private IndexInput fieldsStream;
		
		private IndexInput indexStream;
		private int size;
		
		private System.LocalDataStoreSlot fieldsStreamTL = System.Threading.Thread.AllocateDataSlot();
		
		public FieldsReader(Directory d, System.String segment, FieldInfos fn)
		{
			fieldInfos = fn;
			
			cloneableFieldsStream = d.OpenInput(segment + ".fdt");
			fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
			indexStream = d.OpenInput(segment + ".fdx");
			size = (int) (indexStream.Length() / 8);
		}
		
		/// <summary> Closes the underlying {@link Lucene.Net.Store.IndexInput} streams, including any ones associated with a
		/// lazy implementation of a Field.  This means that the Fields values will not be accessible.
		/// 
		/// </summary>
		/// <throws>  IOException </throws>
		public void  Close()
		{
			fieldsStream.Close();
			cloneableFieldsStream.Close();
			indexStream.Close();
			IndexInput localFieldsStream = (IndexInput) System.Threading.Thread.GetData(fieldsStreamTL);
			if (localFieldsStream != null)
			{
				localFieldsStream.Close();
				System.Threading.Thread.SetData(fieldsStreamTL, null);
			}
		}
		
		public int Size()
		{
			return size;
		}
		
		public Document Doc(int n, FieldSelector fieldSelector)
		{
			indexStream.Seek(n * 8L);
			long position = indexStream.ReadLong();
			fieldsStream.Seek(position);
			
			Document doc = new Document();
			int numFields = fieldsStream.ReadVInt();
			for (int i = 0; i < numFields; i++)
			{
				int fieldNumber = fieldsStream.ReadVInt();
				FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
				FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.Accept(fi.name);
				
				byte bits = fieldsStream.ReadByte();
				bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
				bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
				bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
				//TODO: Find an alternative approach here if this list continues to grow beyond the
				//list of 5 or 6 currently here.  See Lucene 762 for discussion
				if (acceptField.Equals(FieldSelectorResult.LOAD))
				{
					AddField(doc, fi, binary, compressed, tokenize);
				}
				else if (acceptField.Equals(FieldSelectorResult.LOAD_FOR_MERGE))
				{
					AddFieldForMerge(doc, fi, binary, compressed, tokenize);
				}
				else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
				{
					AddField(doc, fi, binary, compressed, tokenize);
					break; //Get out of this loop
				}
				else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
				{
					AddFieldLazy(doc, fi, binary, compressed, tokenize);
				}
				else if (acceptField.Equals(FieldSelectorResult.SIZE))
				{
					SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
				}
				else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
				{
					AddFieldSize(doc, fi, binary, compressed);
					break;
				}
				else
				{
					SkipField(binary, compressed);
				}
			}
			
			return doc;
		}
		
		/// <summary> Skip the field.  We still have to read some of the information about the field, but can skip past the actual content.
		/// This will have the most payoff on large fields.
		/// </summary>
		private void  SkipField(bool binary, bool compressed)
		{
			SkipField(binary, compressed, fieldsStream.ReadVInt());
		}
		
		private void  SkipField(bool binary, bool compressed, int toRead)
		{
			if (binary || compressed)
			{
				long pointer = fieldsStream.GetFilePointer();
				fieldsStream.Seek(pointer + toRead);
			}
			else
			{
				//We need to skip chars.  This will slow us down, but still better
				fieldsStream.SkipChars(toRead);
			}
		}
		
		private void  AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
		{
			if (binary == true)
			{
				int toRead = fieldsStream.ReadVInt();
				long pointer = fieldsStream.GetFilePointer();
				if (compressed)
				{
					//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
					doc.Add(new LazyField(this, fi.name, Field.Store.COMPRESS, toRead, pointer));
				}
				else
				{
					//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
					doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer));
				}
				//Need to move the pointer ahead by toRead positions
				fieldsStream.Seek(pointer + toRead);
			}
			else
			{
				Field.Store store = Field.Store.YES;
				Field.Index index = GetIndexType(fi, tokenize);
				Field.TermVector termVector = GetTermVectorType(fi);
				
				Fieldable f;
				if (compressed)
				{
					store = Field.Store.COMPRESS;
					int toRead = fieldsStream.ReadVInt();
					long pointer = fieldsStream.GetFilePointer();
					f = new LazyField(this, fi.name, store, toRead, pointer);
					//skip over the part that we aren't loading
					fieldsStream.Seek(pointer + toRead);
					f.SetOmitNorms(fi.omitNorms);
				}
				else
				{
					int length = fieldsStream.ReadVInt();
					long pointer = fieldsStream.GetFilePointer();
					//Skip ahead of where we are by the length of what is stored
					fieldsStream.SkipChars(length);
					f = new LazyField(this, fi.name, store, index, termVector, length, pointer);
					f.SetOmitNorms(fi.omitNorms);
				}
				doc.Add(f);
			}
		}
		
		// in merge mode we don't uncompress the data of a compressed field
		private void  AddFieldForMerge(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
		{
			System.Object data;
			
			if (binary || compressed)
			{
				int toRead = fieldsStream.ReadVInt();
				byte[] b = new byte[toRead];
				fieldsStream.ReadBytes(b, 0, b.Length);
				data = b;
			}
			else
			{
				data = fieldsStream.ReadString();
			}
			
			doc.Add(new FieldForMerge(data, fi, binary, compressed, tokenize));
		}
		
		private void  AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
		{
			
			//we have a binary stored field, and it may be compressed
			if (binary)
			{
				int toRead = fieldsStream.ReadVInt();
				byte[] b = new byte[toRead];
				fieldsStream.ReadBytes(b, 0, b.Length);
				if (compressed)
					doc.Add(new Field(fi.name, Uncompress(b), Field.Store.COMPRESS));
				else
					doc.Add(new Field(fi.name, b, Field.Store.YES));
			}
			else
			{
				Field.Store store = Field.Store.YES;
				Field.Index index = GetIndexType(fi, tokenize);
				Field.TermVector termVector = GetTermVectorType(fi);
				
				Fieldable f;
				if (compressed)
				{
					store = Field.Store.COMPRESS;
					int toRead = fieldsStream.ReadVInt();
					
					byte[] b = new byte[toRead];
					fieldsStream.ReadBytes(b, 0, b.Length);
					f = new Field(fi.name, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, termVector);
					f.SetOmitNorms(fi.omitNorms);
				}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -