📄 fieldsreader.java
字号:
data = b; } else { data = fieldsStream.readString(); } doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize)); } private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException { //we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); if (compressed) doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); else doc.add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); f = new Field(fi.name, // field name new String(uncompress(b), "UTF-8"), // uncompress the value and add as string store, index, termVector); f.setOmitNorms(fi.omitNorms); } else { f = new Field(fi.name, // name fieldsStream.readString(), // read value store, index, termVector); f.setOmitNorms(fi.omitNorms); } doc.add(f); } } // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) // Read just the size -- caller must skip the field content to continue reading fields // Return the size in bytes or chars, depending on field type private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException { int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size; byte[] sizebytes = new byte[4]; sizebytes[0] = (byte) (bytesize>>>24); sizebytes[1] = (byte) (bytesize>>>16); sizebytes[2] = (byte) (bytesize>>> 8); sizebytes[3] = (byte) bytesize ; doc.add(new Field(fi.name, sizebytes, Field.Store.YES)); return size; } private Field.TermVector getTermVectorType(FieldInfo fi) { Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } return termVector; } private Field.Index getIndexType(FieldInfo fi, boolean tokenize) { Field.Index index; if (fi.isIndexed && tokenize) index = Field.Index.TOKENIZED; else if (fi.isIndexed && !tokenize) index = Field.Index.UN_TOKENIZED; else index = Field.Index.NO; return index; } /** * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is * loaded. */ private class LazyField extends AbstractField implements Fieldable { private int toRead; private long pointer; public LazyField(String name, Field.Store store, int toRead, long pointer) { super(name, store, Field.Index.NO, Field.TermVector.NO); this.toRead = toRead; this.pointer = pointer; lazy = true; } public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) { super(name, store, index, termVector); this.toRead = toRead; this.pointer = pointer; lazy = true; } private IndexInput getFieldStream() { IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); if (localFieldsStream == null) { localFieldsStream = (IndexInput) cloneableFieldsStream.clone(); fieldsStreamTL.set(localFieldsStream); } return localFieldsStream; } /** The value of the field in Binary, or null. If null, the Reader value, * String value, or TokenStream value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public byte[] binaryValue() { ensureOpen(); if (fieldsData == null) { final byte[] b = new byte[toRead]; IndexInput localFieldsStream = getFieldStream(); //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people //since they are already handling this exception when getting the document try { localFieldsStream.seek(pointer); localFieldsStream.readBytes(b, 0, b.length); if (isCompressed == true) { fieldsData = uncompress(b); } else { fieldsData = b; } } catch (IOException e) { throw new FieldReaderException(e); } } return fieldsData instanceof byte[] ? (byte[]) fieldsData : null; } /** The value of the field as a Reader, or null. If null, the String value, * binary value, or TokenStream value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public Reader readerValue() { ensureOpen(); return fieldsData instanceof Reader ? (Reader) fieldsData : null; } /** The value of the field as a TokesStream, or null. If null, the Reader value, * String value, or binary value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public TokenStream tokenStreamValue() { ensureOpen(); return fieldsData instanceof TokenStream ? (TokenStream) fieldsData : null; } /** The value of the field as a String, or null. If null, the Reader value, * binary value, or TokenStream value is used. Exactly one of stringValue(), * readerValue(), binaryValue(), and tokenStreamValue() must be set. */ public String stringValue() { ensureOpen(); if (fieldsData == null) { IndexInput localFieldsStream = getFieldStream(); try { localFieldsStream.seek(pointer); if (isCompressed) { final byte[] b = new byte[toRead]; localFieldsStream.readBytes(b, 0, b.length); fieldsData = new String(uncompress(b), "UTF-8"); } else { //read in chars b/c we already know the length we need to read char[] chars = new char[toRead]; localFieldsStream.readChars(chars, 0, toRead); fieldsData = new String(chars); } } catch (IOException e) { throw new FieldReaderException(e); } } return fieldsData instanceof String ? (String) fieldsData : null; } public long getPointer() { ensureOpen(); return pointer; } public void setPointer(long pointer) { ensureOpen(); this.pointer = pointer; } public int getToRead() { ensureOpen(); return toRead; } public void setToRead(int toRead) { ensureOpen(); this.toRead = toRead; } } private final byte[] uncompress(final byte[] input) throws CorruptIndexException, IOException { Inflater decompressor = new Inflater(); decompressor.setInput(input); // Create an expandable byte array to hold the decompressed data ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); // Decompress the data byte[] buf = new byte[1024]; while (!decompressor.finished()) { try { int count = decompressor.inflate(buf); bos.write(buf, 0, count); } catch (DataFormatException e) { // this will happen if the field is not compressed CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString()); newException.initCause(e); throw newException; } } decompressor.end(); // Get the decompressed data return bos.toByteArray(); } // Instances of this class hold field properties and data // for merge final static class FieldForMerge extends AbstractField { public String stringValue() { return (String) this.fieldsData; } public Reader readerValue() { // not needed for merge return null; } public byte[] binaryValue() { return (byte[]) this.fieldsData; } public TokenStream tokenStreamValue() { // not needed for merge return null; } public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) { this.isStored = true; this.fieldsData = value; this.isCompressed = compressed; this.isBinary = binary; this.isTokenized = tokenize; this.name = fi.name.intern(); this.isIndexed = fi.isIndexed; this.omitNorms = fi.omitNorms; this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector; this.storePositionWithTermVector = fi.storePositionWithTermVector; this.storeTermVector = fi.storeTermVector; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -