📄 fieldsreader.java
字号:
data = b; } else { data = fieldsStream.readString(); } doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize)); } private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException { //we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); if (compressed) doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); else doc.add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); f = new Field(fi.name, // field name new String(uncompress(b), "UTF-8"), // uncompress the value and add as string store, index, termVector); f.setOmitNorms(fi.omitNorms); } else { f = new Field(fi.name, // name fieldsStream.readString(), // read value store, index, termVector); f.setOmitNorms(fi.omitNorms); } doc.add(f); } } // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) // Read just the size -- caller must skip the field content to continue reading fields // Return the size in bytes or chars, depending on field type private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException { int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size; byte[] sizebytes = new byte[4]; sizebytes[0] = (byte) (bytesize>>>24); sizebytes[1] = (byte) (bytesize>>>16); sizebytes[2] = (byte) (bytesize>>> 8); sizebytes[3] = (byte) bytesize ; doc.add(new Field(fi.name, sizebytes, Field.Store.YES)); return size; } private Field.TermVector getTermVectorType(FieldInfo fi) { Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } return termVector; } private Field.Index getIndexType(FieldInfo fi, boolean tokenize) { Field.Index index; if (fi.isIndexed && tokenize) index = Field.Index.ANALYZED; else if (fi.isIndexed && !tokenize) index = Field.Index.NOT_ANALYZED; else index = Field.Index.NO; return index; } /** * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is * loaded. */ private class LazyField extends AbstractField implements Fieldable { private int toRead; private long pointer; public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) { super(name, store, Field.Index.NO, Field.TermVector.NO); this.toRead = toRead; this.pointer = pointer; this.isBinary = isBinary; lazy = true; } public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) { super(name, store, index, termVector); this.toRead = toRead; this.pointer = pointer; this.isBinary = isBinary; lazy = true; } private IndexInput getFieldStream() { IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); if (localFieldsStream == null) { localFieldsStream = (IndexInput) cloneableFieldsStream.clone(); fieldsStreamTL.set(localFieldsStream); } return localFieldsStream; } /** The value of the field in Binary, or null. If null, the Reader value, * String value, or TokenStream value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public byte[] binaryValue() { return getBinaryValue(null); } /** The value of the field as a Reader, or null. If null, the String value, * binary value, or TokenStream value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public Reader readerValue() { ensureOpen(); return null; } /** The value of the field as a TokenStream, or null. If null, the Reader value, * String value, or binary value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public TokenStream tokenStreamValue() { ensureOpen(); return null; } /** The value of the field as a String, or null. If null, the Reader value, * binary value, or TokenStream value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public String stringValue() { ensureOpen(); if (isBinary) return null; else { if (fieldsData == null) { IndexInput localFieldsStream = getFieldStream(); try { localFieldsStream.seek(pointer); if (isCompressed) { final byte[] b = new byte[toRead]; localFieldsStream.readBytes(b, 0, b.length); fieldsData = new String(uncompress(b), "UTF-8"); } else { if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { byte[] bytes = new byte[toRead]; localFieldsStream.readBytes(bytes, 0, toRead); fieldsData = new String(bytes, "UTF-8"); } else { //read in chars b/c we already know the length we need to read char[] chars = new char[toRead]; localFieldsStream.readChars(chars, 0, toRead); fieldsData = new String(chars); } } } catch (IOException e) { throw new FieldReaderException(e); } } return (String) fieldsData; } } public long getPointer() { ensureOpen(); return pointer; } public void setPointer(long pointer) { ensureOpen(); this.pointer = pointer; } public int getToRead() { ensureOpen(); return toRead; } public void setToRead(int toRead) { ensureOpen(); this.toRead = toRead; } public byte[] getBinaryValue(byte[] result) { ensureOpen(); if (isBinary) { if (fieldsData == null) { // Allocate new buffer if result is null or too small final byte[] b; if (result == null || result.length < toRead) b = new byte[toRead]; else b = result; IndexInput localFieldsStream = getFieldStream(); // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people // since they are already handling this exception when getting the document try { localFieldsStream.seek(pointer); localFieldsStream.readBytes(b, 0, toRead); if (isCompressed == true) { fieldsData = uncompress(b); } else { fieldsData = b; } } catch (IOException e) { throw new FieldReaderException(e); } binaryOffset = 0; binaryLength = toRead; } return (byte[]) fieldsData; } else return null; } } private final byte[] uncompress(final byte[] input) throws CorruptIndexException, IOException { // Create an expandable byte array to hold the decompressed data ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); Inflater decompressor = new Inflater(); try { decompressor.setInput(input); // Decompress the data byte[] buf = new byte[1024]; while (!decompressor.finished()) { try { int count = decompressor.inflate(buf); bos.write(buf, 0, count); } catch (DataFormatException e) { // this will happen if the field is not compressed CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString()); newException.initCause(e); throw newException; } } } finally { decompressor.end(); } // Get the decompressed data return bos.toByteArray(); } // Instances of this class hold field properties and data // for merge final static class FieldForMerge extends AbstractField { public String stringValue() { return (String) this.fieldsData; } public Reader readerValue() { // not needed for merge return null; } public byte[] binaryValue() { return (byte[]) this.fieldsData; } public TokenStream tokenStreamValue() { // not needed for merge return null; } public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) { this.isStored = true; this.fieldsData = value; this.isCompressed = compressed; this.isBinary = binary; this.isTokenized = tokenize; this.name = fi.name.intern(); this.isIndexed = fi.isIndexed; this.omitNorms = fi.omitNorms; this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector; this.storePositionWithTermVector = fi.storePositionWithTermVector; this.storeTermVector = fi.storeTermVector; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -