📄 fieldsreader.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
      data = b;    } else {      data = fieldsStream.readString();    }          doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));  }    private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException {    //we have a binary stored field, and it may be compressed    if (binary) {      int toRead = fieldsStream.readVInt();      final byte[] b = new byte[toRead];      fieldsStream.readBytes(b, 0, b.length);      if (compressed)        doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));      else        doc.add(new Field(fi.name, b, Field.Store.YES));    } else {      Field.Store store = Field.Store.YES;      Field.Index index = getIndexType(fi, tokenize);      Field.TermVector termVector = getTermVectorType(fi);      Fieldable f;      if (compressed) {        store = Field.Store.COMPRESS;        int toRead = fieldsStream.readVInt();        final byte[] b = new byte[toRead];        fieldsStream.readBytes(b, 0, b.length);        f = new Field(fi.name,      // field name                new String(uncompress(b), "UTF-8"), // uncompress the value and add as string                store,                index,                termVector);        f.setOmitNorms(fi.omitNorms);      } else {        f = new Field(fi.name,     // name                fieldsStream.readString(), // read value                store,                index,                termVector);        f.setOmitNorms(fi.omitNorms);      }      doc.add(f);    }  }    // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)  // Read just the size -- caller must skip the field content to continue reading fields  // Return the size in bytes or chars, depending on field type  private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {    int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;    byte[] sizebytes = new byte[4];    sizebytes[0] = (byte) (bytesize>>>24);    sizebytes[1] = (byte) (bytesize>>>16);    sizebytes[2] = (byte) (bytesize>>> 8);    sizebytes[3] = (byte)  bytesize      ;    doc.add(new Field(fi.name, sizebytes, Field.Store.YES));    return size;  }  private Field.TermVector getTermVectorType(FieldInfo fi) {    Field.TermVector termVector = null;    if (fi.storeTermVector) {      if (fi.storeOffsetWithTermVector) {        if (fi.storePositionWithTermVector) {          termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;        } else {          termVector = Field.TermVector.WITH_OFFSETS;        }      } else if (fi.storePositionWithTermVector) {        termVector = Field.TermVector.WITH_POSITIONS;      } else {        termVector = Field.TermVector.YES;      }    } else {      termVector = Field.TermVector.NO;    }    return termVector;  }  private Field.Index getIndexType(FieldInfo fi, boolean tokenize) {    Field.Index index;    if (fi.isIndexed && tokenize)      index = Field.Index.TOKENIZED;    else if (fi.isIndexed && !tokenize)      index = Field.Index.UN_TOKENIZED;    else      index = Field.Index.NO;    return index;  }  /**   * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is   * loaded.   */  private class LazyField extends AbstractField implements Fieldable {    private int toRead;    private long pointer;    public LazyField(String name, Field.Store store, int toRead, long pointer) {      super(name, store, Field.Index.NO, Field.TermVector.NO);      this.toRead = toRead;      this.pointer = pointer;      lazy = true;    }    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {      super(name, store, index, termVector);      this.toRead = toRead;      this.pointer = pointer;      lazy = true;    }    private IndexInput getFieldStream() {      IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();      if (localFieldsStream == null) {        localFieldsStream = (IndexInput) cloneableFieldsStream.clone();        fieldsStreamTL.set(localFieldsStream);      }      return localFieldsStream;    }    /** The value of the field in Binary, or null.  If null, the Reader value,     * String value, or TokenStream value is used. Exactly one of stringValue(),      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */    public byte[] binaryValue() {      ensureOpen();      if (fieldsData == null) {        final byte[] b = new byte[toRead];        IndexInput localFieldsStream = getFieldStream();        //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people        //since they are already handling this exception when getting the document        try {          localFieldsStream.seek(pointer);          localFieldsStream.readBytes(b, 0, b.length);          if (isCompressed == true) {            fieldsData = uncompress(b);          } else {            fieldsData = b;          }        } catch (IOException e) {          throw new FieldReaderException(e);        }      }      return fieldsData instanceof byte[] ? (byte[]) fieldsData : null;    }    /** The value of the field as a Reader, or null.  If null, the String value,     * binary value, or TokenStream value is used.  Exactly one of stringValue(),      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */    public Reader readerValue() {      ensureOpen();      return fieldsData instanceof Reader ? (Reader) fieldsData : null;    }    /** The value of the field as a TokesStream, or null.  If null, the Reader value,     * String value, or binary value is used. Exactly one of stringValue(),      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */    public TokenStream tokenStreamValue() {      ensureOpen();      return fieldsData instanceof TokenStream ? (TokenStream) fieldsData : null;    }        /** The value of the field as a String, or null.  If null, the Reader value,     * binary value, or TokenStream value is used.  Exactly one of stringValue(),      * readerValue(), binaryValue(), and tokenStreamValue() must be set. */    public String stringValue() {      ensureOpen();      if (fieldsData == null) {        IndexInput localFieldsStream = getFieldStream();        try {          localFieldsStream.seek(pointer);          if (isCompressed) {            final byte[] b = new byte[toRead];            localFieldsStream.readBytes(b, 0, b.length);            fieldsData = new String(uncompress(b), "UTF-8");          } else {            //read in chars b/c we already know the length we need to read            char[] chars = new char[toRead];            localFieldsStream.readChars(chars, 0, toRead);            fieldsData = new String(chars);          }        } catch (IOException e) {          throw new FieldReaderException(e);        }      }      return fieldsData instanceof String ? (String) fieldsData : null;    }    public long getPointer() {      ensureOpen();      return pointer;    }    public void setPointer(long pointer) {      ensureOpen();      this.pointer = pointer;    }    public int getToRead() {      ensureOpen();      return toRead;    }    public void setToRead(int toRead) {      ensureOpen();      this.toRead = toRead;    }  }  private final byte[] uncompress(final byte[] input)          throws CorruptIndexException, IOException {    Inflater decompressor = new Inflater();    decompressor.setInput(input);    // Create an expandable byte array to hold the decompressed data    ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);    // Decompress the data    byte[] buf = new byte[1024];    while (!decompressor.finished()) {      try {        int count = decompressor.inflate(buf);        bos.write(buf, 0, count);      }      catch (DataFormatException e) {        // this will happen if the field is not compressed        CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());        newException.initCause(e);        throw newException;      }    }      decompressor.end();        // Get the decompressed data    return bos.toByteArray();  }    // Instances of this class hold field properties and data  // for merge  final static class FieldForMerge extends AbstractField {    public String stringValue() {      return (String) this.fieldsData;    }    public Reader readerValue() {      // not needed for merge      return null;    }    public byte[] binaryValue() {      return (byte[]) this.fieldsData;    }    public TokenStream tokenStreamValue() {      // not needed for merge      return null;    }        public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {      this.isStored = true;        this.fieldsData = value;      this.isCompressed = compressed;      this.isBinary = binary;      this.isTokenized = tokenize;      this.name = fi.name.intern();      this.isIndexed = fi.isIndexed;      this.omitNorms = fi.omitNorms;                this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;      this.storePositionWithTermVector = fi.storePositionWithTermVector;      this.storeTermVector = fi.storeTermVector;                }       }}
上一页 12
💿 文件大小 5390 K
👤 上传用户 rickie936
📂 所属分类 Java编程
🏷️ 相关标签

#SearchEngine #open-source #Framework #Lucene
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -