📄 fieldsreader.java

📁 一套java版本的搜索引擎源码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
      } else {        f = new Field(fi.name,     // name                fieldsStream.readString(), // read value                store,                index,                termVector);        f.setOmitNorms(fi.omitNorms);      }      doc.add(f);    }  }    // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)  // Read just the size -- caller must skip the field content to continue reading fields  // Return the size in bytes or chars, depending on field type  private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {    int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;    byte[] sizebytes = new byte[4];    sizebytes[0] = (byte) (bytesize>>>24);    sizebytes[1] = (byte) (bytesize>>>16);    sizebytes[2] = (byte) (bytesize>>> 8);    sizebytes[3] = (byte)  bytesize      ;    doc.add(new Field(fi.name, sizebytes, Field.Store.YES));    return size;  }  private Field.TermVector getTermVectorType(FieldInfo fi) {    Field.TermVector termVector = null;    if (fi.storeTermVector) {      if (fi.storeOffsetWithTermVector) {        if (fi.storePositionWithTermVector) {          termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;        } else {          termVector = Field.TermVector.WITH_OFFSETS;        }      } else if (fi.storePositionWithTermVector) {        termVector = Field.TermVector.WITH_POSITIONS;      } else {        termVector = Field.TermVector.YES;      }    } else {      termVector = Field.TermVector.NO;    }    return termVector;  }  private Field.Index getIndexType(FieldInfo fi, boolean tokenize) {    Field.Index index;    if (fi.isIndexed && tokenize)      index = Field.Index.TOKENIZED;    else if (fi.isIndexed && !tokenize)      index = Field.Index.UN_TOKENIZED;    else      index = Field.Index.NO;    return index;  }  /**   * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is   * loaded.   */  private class LazyField extends AbstractField implements Fieldable {    private int toRead;    private long pointer;    public LazyField(String name, Field.Store store, int toRead, long pointer) {      super(name, store, Field.Index.NO, Field.TermVector.NO);      this.toRead = toRead;      this.pointer = pointer;      lazy = true;    }    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {      super(name, store, index, termVector);      this.toRead = toRead;      this.pointer = pointer;      lazy = true;    }    private IndexInput getFieldStream() {      IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();      if (localFieldsStream == null) {        localFieldsStream = (IndexInput) cloneableFieldsStream.clone();        fieldsStreamTL.set(localFieldsStream);      }      return localFieldsStream;    }    /**     * The value of the field in Binary, or null.  If null, the Reader or     * String value is used.  Exactly one of stringValue(), readerValue() and     * binaryValue() must be set.     */    public byte[] binaryValue() {      if (fieldsData == null) {        final byte[] b = new byte[toRead];        IndexInput localFieldsStream = getFieldStream();        //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people        //since they are already handling this exception when getting the document        try {          localFieldsStream.seek(pointer);          localFieldsStream.readBytes(b, 0, b.length);          if (isCompressed == true) {            fieldsData = uncompress(b);          } else {            fieldsData = b;          }        } catch (IOException e) {          throw new FieldReaderException(e);        }      }      return fieldsData instanceof byte[] ? (byte[]) fieldsData : null;    }    /**     * The value of the field as a Reader, or null.  If null, the String value     * or binary value is  used.  Exactly one of stringValue(), readerValue(),     * and binaryValue() must be set.     */    public Reader readerValue() {      return fieldsData instanceof Reader ? (Reader) fieldsData : null;    }    /**     * The value of the field as a String, or null.  If null, the Reader value     * or binary value is used.  Exactly one of stringValue(), readerValue(), and     * binaryValue() must be set.     */    public String stringValue() {      if (fieldsData == null) {        IndexInput localFieldsStream = getFieldStream();        try {          localFieldsStream.seek(pointer);          if (isCompressed) {            final byte[] b = new byte[toRead];            localFieldsStream.readBytes(b, 0, b.length);            fieldsData = new String(uncompress(b), "UTF-8");          } else {            //read in chars b/c we already know the length we need to read            char[] chars = new char[toRead];            localFieldsStream.readChars(chars, 0, toRead);            fieldsData = new String(chars);          }        } catch (IOException e) {          throw new FieldReaderException(e);        }      }      return fieldsData instanceof String ? (String) fieldsData : null;    }    public long getPointer() {      return pointer;    }    public void setPointer(long pointer) {      this.pointer = pointer;    }    public int getToRead() {      return toRead;    }    public void setToRead(int toRead) {      this.toRead = toRead;    }  }  private final byte[] uncompress(final byte[] input)          throws IOException {    Inflater decompressor = new Inflater();    decompressor.setInput(input);    // Create an expandable byte array to hold the decompressed data    ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);    // Decompress the data    byte[] buf = new byte[1024];    while (!decompressor.finished()) {      try {        int count = decompressor.inflate(buf);        bos.write(buf, 0, count);      }      catch (DataFormatException e) {        // this will happen if the field is not compressed        IOException newException = new IOException("field data are in wrong format: " + e.toString());        newException.initCause(e);        throw newException;      }    }      decompressor.end();        // Get the decompressed data    return bos.toByteArray();  }    // Instances of this class hold field properties and data  // for merge  final static class FieldForMerge extends AbstractField {    public String stringValue() {      return (String) this.fieldsData;    }    public Reader readerValue() {      // not needed for merge      return null;    }    public byte[] binaryValue() {      return (byte[]) this.fieldsData;    }        public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {      this.isStored = true;        this.fieldsData = value;      this.isCompressed = compressed;      this.isBinary = binary;      this.isTokenized = tokenize;      this.name = fi.name.intern();      this.isIndexed = fi.isIndexed;      this.omitNorms = fi.omitNorms;                this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;      this.storePositionWithTermVector = fi.storePositionWithTermVector;      this.storeTermVector = fi.storeTermVector;                }       }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -