📄 fieldsreader.java
字号:
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.document.*;import org.apache.lucene.store.Directory;import org.apache.lucene.store.IndexInput;import java.io.ByteArrayOutputStream;import java.io.IOException;import java.io.Reader;import java.util.zip.DataFormatException;import java.util.zip.Inflater;/** * Class responsible for access to stored document fields. * <p/> * It uses <segment>.fdt and <segment>.fdx; files. * * @version $Id: FieldsReader.java 507009 2007-02-13 14:06:52Z gsingers $ */final class FieldsReader { private final FieldInfos fieldInfos; // The main fieldStream, used only for cloning. private final IndexInput cloneableFieldsStream; // This is a clone of cloneableFieldsStream used for reading documents. // It should not be cloned outside of a synchronized context. private final IndexInput fieldsStream; private final IndexInput indexStream; private int size; private ThreadLocal fieldsStreamTL = new ThreadLocal(); FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; cloneableFieldsStream = d.openInput(segment + ".fdt"); fieldsStream = (IndexInput)cloneableFieldsStream.clone(); indexStream = d.openInput(segment + ".fdx"); size = (int) (indexStream.length() / 8); } /** * Closes the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a * lazy implementation of a Field. This means that the Fields values will not be accessible. * * @throws IOException */ final void close() throws IOException { fieldsStream.close(); cloneableFieldsStream.close(); indexStream.close(); IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); if (localFieldsStream != null) { localFieldsStream.close(); fieldsStreamTL.set(null); } } final int size() { return size; } final Document doc(int n, FieldSelector fieldSelector) throws IOException { indexStream.seek(n * 8L); long position = indexStream.readLong(); fieldsStream.seek(position); Document doc = new Document(); int numFields = fieldsStream.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); byte bits = fieldsStream.readByte(); boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.equals(FieldSelectorResult.LOAD)) { addField(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) { addFieldForMerge(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){ addField(doc, fi, binary, compressed, tokenize); break;//Get out of this loop } else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) { addFieldLazy(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.SIZE)){ skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); } else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){ addFieldSize(doc, fi, binary, compressed); break; } else { skipField(binary, compressed); } } return doc; } /** * Skip the field. We still have to read some of the information about the field, but can skip past the actual content. * This will have the most payoff on large fields. */ private void skipField(boolean binary, boolean compressed) throws IOException { skipField(binary, compressed, fieldsStream.readVInt()); } private void skipField(boolean binary, boolean compressed, int toRead) throws IOException { if (binary || compressed) { long pointer = fieldsStream.getFilePointer(); fieldsStream.seek(pointer + toRead); } else { //We need to skip chars. This will slow us down, but still better fieldsStream.skipChars(toRead); } } private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { if (binary == true) { int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); if (compressed) { //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer)); } else { //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer)); } //Need to move the pointer ahead by toRead positions fieldsStream.seek(pointer + toRead); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); f = new LazyField(fi.name, store, toRead, pointer); //skip over the part that we aren't loading fieldsStream.seek(pointer + toRead); f.setOmitNorms(fi.omitNorms); } else { int length = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); //Skip ahead of where we are by the length of what is stored fieldsStream.skipChars(length); f = new LazyField(fi.name, store, index, termVector, length, pointer); f.setOmitNorms(fi.omitNorms); } doc.add(f); } } // in merge mode we don't uncompress the data of a compressed field private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { Object data; if (binary || compressed) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); data = b; } else { data = fieldsStream.readString(); } doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize)); } private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { //we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); if (compressed) doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); else doc.add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); f = new Field(fi.name, // field name new String(uncompress(b), "UTF-8"), // uncompress the value and add as string store, index, termVector); f.setOmitNorms(fi.omitNorms);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -