📄 fieldsreader.java

📁 lucene-2.4.0 是一个全文收索的工具包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
      data = b;    } else {      data = fieldsStream.readString();    }          doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));  }    private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException {    //we have a binary stored field, and it may be compressed    if (binary) {      int toRead = fieldsStream.readVInt();      final byte[] b = new byte[toRead];      fieldsStream.readBytes(b, 0, b.length);      if (compressed)        doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));      else        doc.add(new Field(fi.name, b, Field.Store.YES));    } else {      Field.Store store = Field.Store.YES;      Field.Index index = getIndexType(fi, tokenize);      Field.TermVector termVector = getTermVectorType(fi);      Fieldable f;      if (compressed) {        store = Field.Store.COMPRESS;        int toRead = fieldsStream.readVInt();        final byte[] b = new byte[toRead];        fieldsStream.readBytes(b, 0, b.length);        f = new Field(fi.name,      // field name                new String(uncompress(b), "UTF-8"), // uncompress the value and add as string                store,                index,                termVector);        f.setOmitNorms(fi.omitNorms);      } else {        f = new Field(fi.name,     // name                fieldsStream.readString(), // read value                store,                index,                termVector);        f.setOmitNorms(fi.omitNorms);      }      doc.add(f);    }  }    // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)  // Read just the size -- caller must skip the field content to continue reading fields  // Return the size in bytes or chars, depending on field type  private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {    int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;    byte[] sizebytes = new byte[4];    sizebytes[0] = (byte) (bytesize>>>24);    sizebytes[1] = (byte) (bytesize>>>16);    sizebytes[2] = (byte) (bytesize>>> 8);    sizebytes[3] = (byte)  bytesize      ;    doc.add(new Field(fi.name, sizebytes, Field.Store.YES));    return size;  }  private Field.TermVector getTermVectorType(FieldInfo fi) {    Field.TermVector termVector = null;    if (fi.storeTermVector) {      if (fi.storeOffsetWithTermVector) {        if (fi.storePositionWithTermVector) {          termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;        } else {          termVector = Field.TermVector.WITH_OFFSETS;        }      } else if (fi.storePositionWithTermVector) {        termVector = Field.TermVector.WITH_POSITIONS;      } else {        termVector = Field.TermVector.YES;      }    } else {      termVector = Field.TermVector.NO;    }    return termVector;  }  private Field.Index getIndexType(FieldInfo fi, boolean tokenize) {    Field.Index index;    if (fi.isIndexed && tokenize)      index = Field.Index.ANALYZED;    else if (fi.isIndexed && !tokenize)      index = Field.Index.NOT_ANALYZED;    else      index = Field.Index.NO;    return index;  }  /**   * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is   * loaded.   */  private class LazyField extends AbstractField implements Fieldable {    private int toRead;    private long pointer;    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) {      super(name, store, Field.Index.NO, Field.TermVector.NO);      this.toRead = toRead;      this.pointer = pointer;      this.isBinary = isBinary;      lazy = true;    }    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) {      super(name, store, index, termVector);      this.toRead = toRead;      this.pointer = pointer;      this.isBinary = isBinary;      lazy = true;    }    private IndexInput getFieldStream() {      IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();      if (localFieldsStream == null) {        localFieldsStream = (IndexInput) cloneableFieldsStream.clone();        fieldsStreamTL.set(localFieldsStream);      }      return localFieldsStream;    }    /** The value of the field in Binary, or null.  If null, the Reader value,     * String value, or TokenStream value is used. Exactly one of stringValue(),      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */    public byte[] binaryValue() {      return getBinaryValue(null);    }    /** The value of the field as a Reader, or null.  If null, the String value,     * binary value, or TokenStream value is used.  Exactly one of stringValue(),      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */    public Reader readerValue() {      ensureOpen();      return null;    }    /** The value of the field as a TokenStream, or null.  If null, the Reader value,     * String value, or binary value is used. Exactly one of stringValue(),      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */    public TokenStream tokenStreamValue() {      ensureOpen();      return null;    }    /** The value of the field as a String, or null.  If null, the Reader value,     * binary value, or TokenStream value is used.  Exactly one of stringValue(),      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */    public String stringValue() {      ensureOpen();      if (isBinary)        return null;      else {        if (fieldsData == null) {          IndexInput localFieldsStream = getFieldStream();          try {            localFieldsStream.seek(pointer);            if (isCompressed) {              final byte[] b = new byte[toRead];              localFieldsStream.readBytes(b, 0, b.length);              fieldsData = new String(uncompress(b), "UTF-8");            } else {              if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {                byte[] bytes = new byte[toRead];                localFieldsStream.readBytes(bytes, 0, toRead);                fieldsData = new String(bytes, "UTF-8");              } else {                //read in chars b/c we already know the length we need to read                char[] chars = new char[toRead];                localFieldsStream.readChars(chars, 0, toRead);                fieldsData = new String(chars);              }            }          } catch (IOException e) {            throw new FieldReaderException(e);          }        }        return (String) fieldsData;      }    }    public long getPointer() {      ensureOpen();      return pointer;    }    public void setPointer(long pointer) {      ensureOpen();      this.pointer = pointer;    }    public int getToRead() {      ensureOpen();      return toRead;    }    public void setToRead(int toRead) {      ensureOpen();      this.toRead = toRead;    }    public byte[] getBinaryValue(byte[] result) {      ensureOpen();      if (isBinary) {        if (fieldsData == null) {          // Allocate new buffer if result is null or too small          final byte[] b;          if (result == null || result.length < toRead)            b = new byte[toRead];          else            b = result;             IndexInput localFieldsStream = getFieldStream();          // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people          // since they are already handling this exception when getting the document          try {            localFieldsStream.seek(pointer);            localFieldsStream.readBytes(b, 0, toRead);            if (isCompressed == true) {              fieldsData = uncompress(b);            } else {              fieldsData = b;            }          } catch (IOException e) {            throw new FieldReaderException(e);          }          binaryOffset = 0;          binaryLength = toRead;        }        return (byte[]) fieldsData;      } else        return null;         }  }    private final byte[] uncompress(final byte[] input)          throws CorruptIndexException, IOException {    // Create an expandable byte array to hold the decompressed data    ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);    Inflater decompressor = new Inflater();    try {      decompressor.setInput(input);      // Decompress the data      byte[] buf = new byte[1024];      while (!decompressor.finished()) {        try {          int count = decompressor.inflate(buf);          bos.write(buf, 0, count);        }        catch (DataFormatException e) {          // this will happen if the field is not compressed          CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());          newException.initCause(e);          throw newException;        }      }    } finally {        decompressor.end();    }        // Get the decompressed data    return bos.toByteArray();  }    // Instances of this class hold field properties and data  // for merge  final static class FieldForMerge extends AbstractField {    public String stringValue() {      return (String) this.fieldsData;    }    public Reader readerValue() {      // not needed for merge      return null;    }    public byte[] binaryValue() {      return (byte[]) this.fieldsData;    }    public TokenStream tokenStreamValue() {      // not needed for merge      return null;    }        public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {      this.isStored = true;        this.fieldsData = value;      this.isCompressed = compressed;      this.isBinary = binary;      this.isTokenized = tokenize;      this.name = fi.name.intern();      this.isIndexed = fi.isIndexed;      this.omitNorms = fi.omitNorms;                this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;      this.storePositionWithTermVector = fi.storePositionWithTermVector;      this.storeTermVector = fi.storeTermVector;                }       }}
上一页 12
💿 文件大小 748 K
👤 上传用户 Rosa_
📂 所属分类 Java编程
🏷️ 相关标签

#lucene #工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -