fieldswriter.java

来自「lucene-2.4.0 是一个全文收索的工具包」· Java 代码 · 共 312 行
JAVA
312 行
package org.apache.lucene.index;/** * Copyright 2004 The Apache Software Foundation *  * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at *  * http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */import java.io.ByteArrayOutputStream;import java.io.IOException;import java.util.Iterator;import java.util.zip.Deflater;import org.apache.lucene.document.Document;import org.apache.lucene.document.Fieldable;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMOutputStream;import org.apache.lucene.store.IndexOutput;import org.apache.lucene.store.IndexInput;final class FieldsWriter{  static final byte FIELD_IS_TOKENIZED = 0x1;  static final byte FIELD_IS_BINARY = 0x2;  static final byte FIELD_IS_COMPRESSED = 0x4;  // Original format  static final int FORMAT = 0;  // Changed strings to UTF8  static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;  // NOTE: if you introduce a new format, make it 1 higher  // than the current one, and always change this if you  // switch to a new format!  static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;      private FieldInfos fieldInfos;    private IndexOutput fieldsStream;    private IndexOutput indexStream;    private boolean doClose;    FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {        fieldInfos = fn;        boolean success = false;        final String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION;        try {          fieldsStream = d.createOutput(fieldsName);          fieldsStream.writeInt(FORMAT_CURRENT);          success = true;        } finally {          if (!success) {            try {              close();            } catch (Throwable t) {              // Suppress so we keep throwing the original exception            }            try {              d.deleteFile(fieldsName);            } catch (Throwable t) {              // Suppress so we keep throwing the original exception            }          }        }        success = false;        final String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;        try {          indexStream = d.createOutput(indexName);          indexStream.writeInt(FORMAT_CURRENT);          success = true;        } finally {          if (!success) {            try {              close();            } catch (IOException ioe) {            }            try {              d.deleteFile(fieldsName);            } catch (Throwable t) {              // Suppress so we keep throwing the original exception            }            try {              d.deleteFile(indexName);            } catch (Throwable t) {              // Suppress so we keep throwing the original exception            }          }        }        doClose = true;    }    FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {        fieldInfos = fn;        fieldsStream = fdt;        indexStream = fdx;        doClose = false;    }    void setFieldsStream(IndexOutput stream) {      this.fieldsStream = stream;    }    // Writes the contents of buffer into the fields stream    // and adds a new entry for this document into the index    // stream.  This assumes the buffer was already written    // in the correct fields format.    void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException {      indexStream.writeLong(fieldsStream.getFilePointer());      fieldsStream.writeVInt(numStoredFields);      buffer.writeTo(fieldsStream);    }    void skipDocument() throws IOException {      indexStream.writeLong(fieldsStream.getFilePointer());      fieldsStream.writeVInt(0);    }    void flush() throws IOException {      indexStream.flush();      fieldsStream.flush();    }    final void close() throws IOException {      if (doClose) {        try {          if (fieldsStream != null) {            try {              fieldsStream.close();            } finally {              fieldsStream = null;            }          }        } catch (IOException ioe) {          try {            if (indexStream != null) {              try {                indexStream.close();              } finally {                indexStream = null;              }            }          } catch (IOException ioe2) {            // Ignore so we throw only first IOException hit          }          throw ioe;        } finally {          if (indexStream != null) {            try {              indexStream.close();            } finally {              indexStream = null;            }          }        }      }    }    final void writeField(FieldInfo fi, Fieldable field) throws IOException {      // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode      // and field.binaryValue() already returns the compressed value for a field      // with isCompressed()==true, so we disable compression in that case      boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);      fieldsStream.writeVInt(fi.number);      byte bits = 0;      if (field.isTokenized())        bits |= FieldsWriter.FIELD_IS_TOKENIZED;      if (field.isBinary())        bits |= FieldsWriter.FIELD_IS_BINARY;      if (field.isCompressed())        bits |= FieldsWriter.FIELD_IS_COMPRESSED;                      fieldsStream.writeByte(bits);                      if (field.isCompressed()) {        // compression is enabled for the current field        final byte[] data;        final int len;        final int offset;        if (disableCompression) {          // optimized case for merging, the data          // is already compressed          data = field.getBinaryValue();          assert data != null;          len = field.getBinaryLength();          offset = field.getBinaryOffset();          } else {          // check if it is a binary field          if (field.isBinary()) {            data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());          } else {            byte x[] = field.stringValue().getBytes("UTF-8");            data = compress(x, 0, x.length);          }          len = data.length;          offset = 0;        }                fieldsStream.writeVInt(len);        fieldsStream.writeBytes(data, offset, len);      }      else {        // compression is disabled for the current field        if (field.isBinary()) {          final byte[] data;          final int len;          final int offset;          data = field.getBinaryValue();          len = field.getBinaryLength();          offset =  field.getBinaryOffset();          fieldsStream.writeVInt(len);          fieldsStream.writeBytes(data, offset, len);        }        else {          fieldsStream.writeString(field.stringValue());        }      }    }    /** Bulk write a contiguous series of documents.  The     *  lengths array is the length (in bytes) of each raw     *  document.  The stream IndexInput is the     *  fieldsStream from which we should bulk-copy all     *  bytes. */    final void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException {      long position = fieldsStream.getFilePointer();      long start = position;      for(int i=0;i<numDocs;i++) {        indexStream.writeLong(position);        position += lengths[i];      }      fieldsStream.copyBytes(stream, position-start);      assert fieldsStream.getFilePointer() == position;    }    final void addDocument(Document doc) throws IOException {        indexStream.writeLong(fieldsStream.getFilePointer());        int storedCount = 0;        Iterator fieldIterator = doc.getFields().iterator();        while (fieldIterator.hasNext()) {            Fieldable field = (Fieldable) fieldIterator.next();            if (field.isStored())                storedCount++;        }        fieldsStream.writeVInt(storedCount);        fieldIterator = doc.getFields().iterator();        while (fieldIterator.hasNext()) {            Fieldable field = (Fieldable) fieldIterator.next();            if (field.isStored())              writeField(fieldInfos.fieldInfo(field.name()), field);        }    }    private final byte[] compress (byte[] input, int offset, int length) {      // Create the compressor with highest level of compression      Deflater compressor = new Deflater();      compressor.setLevel(Deflater.BEST_COMPRESSION);      // Give the compressor the data to compress      compressor.setInput(input, offset, length);      compressor.finish();      /*       * Create an expandable byte array to hold the compressed data.       * You cannot use an array that's the same size as the orginal because       * there is no guarantee that the compressed data will be smaller than       * the uncompressed data.       */      ByteArrayOutputStream bos = new ByteArrayOutputStream(length);      try {        compressor.setLevel(Deflater.BEST_COMPRESSION);        // Give the compressor the data to compress        compressor.setInput(input);        compressor.finish();        // Compress the data        byte[] buf = new byte[1024];        while (!compressor.finished()) {          int count = compressor.deflate(buf);          bos.write(buf, 0, count);        }      } finally {              compressor.end();      }      // Get the compressed data      return bos.toByteArray();    }}
fieldswriter.java - 源码说明

本页面展示了「lucene-2.4.0 是一个全文收索的工具包」中的 fieldswriter.java 源码文件，采用 Java 编程语言编写，共 312 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与lucene相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?