📄 documentswriter.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
            return -1;        else if (c2 < c1)          if (0xffff == c1)            return -1;          else            return 1;        else if (0xffff == c1)          return 0;      }    }    /** Write vInt into freq stream of current Posting */    public void writeFreqVInt(int i) {      while ((i & ~0x7F) != 0) {        writeFreqByte((byte)((i & 0x7f) | 0x80));        i >>>= 7;      }      writeFreqByte((byte) i);    }    /** Write vInt into prox stream of current Posting */    public void writeProxVInt(int i) {      while ((i & ~0x7F) != 0) {        writeProxByte((byte)((i & 0x7f) | 0x80));        i >>>= 7;      }      writeProxByte((byte) i);    }    /** Write byte into freq stream of current Posting */    byte[] freq;    int freqUpto;    public void writeFreqByte(byte b) {      assert freq != null;      if (freq[freqUpto] != 0) {        freqUpto = postingsPool.allocSlice(freq, freqUpto);        freq = postingsPool.buffer;        p.freqUpto = postingsPool.byteOffset;      }      freq[freqUpto++] = b;    }    /** Write byte into prox stream of current Posting */    byte[] prox;    int proxUpto;    public void writeProxByte(byte b) {      assert prox != null;      if (prox[proxUpto] != 0) {        proxUpto = postingsPool.allocSlice(prox, proxUpto);        prox = postingsPool.buffer;        p.proxUpto = postingsPool.byteOffset;        assert prox != null;      }      prox[proxUpto++] = b;      assert proxUpto != prox.length;    }    /** Currently only used to copy a payload into the prox     *  stream. */    public void writeProxBytes(byte[] b, int offset, int len) {      final int offsetEnd = offset + len;      while(offset < offsetEnd) {        if (prox[proxUpto] != 0) {          // End marker          proxUpto = postingsPool.allocSlice(prox, proxUpto);          prox = postingsPool.buffer;          p.proxUpto = postingsPool.byteOffset;        }        prox[proxUpto++] = b[offset++];        assert proxUpto != prox.length;      }    }    /** Write vInt into offsets stream of current     *  PostingVector */    public void writeOffsetVInt(int i) {      while ((i & ~0x7F) != 0) {        writeOffsetByte((byte)((i & 0x7f) | 0x80));        i >>>= 7;      }      writeOffsetByte((byte) i);    }    byte[] offsets;    int offsetUpto;    /** Write byte into offsets stream of current     *  PostingVector */    public void writeOffsetByte(byte b) {      assert offsets != null;      if (offsets[offsetUpto] != 0) {        offsetUpto = vectorsPool.allocSlice(offsets, offsetUpto);        offsets = vectorsPool.buffer;        vector.offsetUpto = vectorsPool.byteOffset;      }      offsets[offsetUpto++] = b;    }    /** Write vInt into pos stream of current     *  PostingVector */    public void writePosVInt(int i) {      while ((i & ~0x7F) != 0) {        writePosByte((byte)((i & 0x7f) | 0x80));        i >>>= 7;      }      writePosByte((byte) i);    }    byte[] pos;    int posUpto;    /** Write byte into pos stream of current     *  PostingVector */    public void writePosByte(byte b) {      assert pos != null;      if (pos[posUpto] != 0) {        posUpto = vectorsPool.allocSlice(pos, posUpto);        pos = vectorsPool.buffer;        vector.posUpto = vectorsPool.byteOffset;      }      pos[posUpto++] = b;    }    PostingVector[] postingsVectors = new PostingVector[1];    int maxPostingsVectors;    // Used to read a string value for a field    ReusableStringReader stringReader = new ReusableStringReader();    /** Holds data associated with a single field, including     *  the Postings hash.  A document may have many *     *  occurrences for a given field name; we gather all *     *  such occurrences here (in docFields) so that we can     *  * process the entire field at once. */    private final class FieldData implements Comparable {      ThreadState threadState;      FieldInfo fieldInfo;      int fieldCount;      Fieldable[] docFields = new Fieldable[1];      int lastGen = -1;      FieldData next;      boolean doNorms;      boolean doVectors;      boolean doVectorPositions;      boolean doVectorOffsets;      boolean postingsCompacted;      int numPostings;            Posting[] postingsHash;      int postingsHashSize;      int postingsHashHalfSize;      int postingsHashMask;      int position;      int length;      int offset;      float boost;      int postingsVectorsUpto;      public FieldData(FieldInfo fieldInfo) {        this.fieldInfo = fieldInfo;        threadState = ThreadState.this;      }      void resetPostingArrays() {        if (!postingsCompacted)          compactPostings();        recyclePostings(this.postingsHash, numPostings);        Arrays.fill(postingsHash, 0, postingsHash.length, null);        postingsCompacted = false;        numPostings = 0;      }      void initPostingArrays() {        // Target hash fill factor of <= 50%        // NOTE: must be a power of two for hash collision        // strategy to work correctly        postingsHashSize = 4;        postingsHashHalfSize = 2;        postingsHashMask = postingsHashSize-1;        postingsHash = new Posting[postingsHashSize];      }      /** So Arrays.sort can sort us. */      public int compareTo(Object o) {        return fieldInfo.name.compareTo(((FieldData) o).fieldInfo.name);      }      private void compactPostings() {        int upto = 0;        for(int i=0;i<postingsHashSize;i++)          if (postingsHash[i] != null)            postingsHash[upto++] = postingsHash[i];        assert upto == numPostings;        postingsCompacted = true;      }      /** Collapse the hash table & sort in-place. */      public Posting[] sortPostings() {        compactPostings();        doPostingSort(postingsHash, numPostings);        return postingsHash;      }      /** Process all occurrences of one field in the document. */      public void processField(Analyzer analyzer) throws IOException, AbortException {        length = 0;        position = 0;        offset = 0;        boost = docBoost;        final int maxFieldLength = writer.getMaxFieldLength();        final int limit = fieldCount;        final Fieldable[] docFieldsFinal = docFields;        boolean doWriteVectors = true;        // Walk through all occurrences in this doc for this        // field:        try {          for(int j=0;j<limit;j++) {            Fieldable field = docFieldsFinal[j];            if (field.isIndexed())              invertField(field, analyzer, maxFieldLength);            if (field.isStored()) {              numStoredFields++;              boolean success = false;              try {                localFieldsWriter.writeField(fieldInfo, field);                success = true;              } finally {                // If we hit an exception inside                // localFieldsWriter.writeField, the                // contents of fdtLocal can be corrupt, so                // we must discard all stored fields for                // this document:                if (!success)                  fdtLocal.reset();              }            }            docFieldsFinal[j] = null;          }        } catch (AbortException ae) {          doWriteVectors = false;          throw ae;        } finally {          if (postingsVectorsUpto > 0) {            try {              if (doWriteVectors) {                // Add term vectors for this field                boolean success = false;                try {                  writeVectors(fieldInfo);                  success = true;                } finally {                  if (!success) {                    // If we hit an exception inside                    // writeVectors, the contents of tvfLocal                    // can be corrupt, so we must discard all                    // term vectors for this document:                    numVectorFields = 0;                    tvfLocal.reset();                  }                }              }            } finally {              if (postingsVectorsUpto > maxPostingsVectors)                maxPostingsVectors = postingsVectorsUpto;              postingsVectorsUpto = 0;              vectorsPool.reset();            }          }        }      }      int offsetEnd;      Token localToken = new Token();      /* Invert one occurrence of one field in the document */      public void invertField(Fieldable field, Analyzer analyzer, final int maxFieldLength) throws IOException, AbortException {        if (length>0)          position += analyzer.getPositionIncrementGap(fieldInfo.name);        if (!field.isTokenized()) {		  // un-tokenized field          String stringValue = field.stringValue();          final int valueLength = stringValue.length();          Token token = localToken;          token.clear();          char[] termBuffer = token.termBuffer();          if (termBuffer.length < valueLength)            termBuffer = token.resizeTermBuffer(valueLength);          stringValue.getChars(0, valueLength, termBuffer, 0);          token.setTermLength(valueLength);          token.setStartOffset(offset);          token.setEndOffset(offset + stringValue.length());          addPosition(token);          offset += stringValue.length();          length++;        } else {                                  // tokenized field          final TokenStream stream;          final TokenStream streamValue = field.tokenStreamValue();          if (streamValue != null)             stream = streamValue;          else {            // the field does not have a TokenStream,            // so we have to obtain one from the analyzer            final Reader reader;			  // find or make Reader            final Reader readerValue = field.readerValue();            if (readerValue != null)              reader = readerValue;            else {              String stringValue = field.stringValue();              if (stringValue == null)                throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");              stringReader.init(stringValue);              reader = stringReader;            }                      // Tokenize field and add to postingTable            stream = analyzer.reusableTokenStream(fieldInfo.name, reader);          }          // reset the TokenStream to the first token          stream.reset();          try {            offsetEnd = offset-1;            for(;;) {              Token token = stream.next(localToken);              if (token == null) break;              position += (token.getPositionIncrement() - 1);              addPosition(token);              if (++length >= maxFieldLength) {                if (infoStream != null)                  infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens");                break;              }            }            offset = offsetEnd+1;          } finally {            stream.close();          }        }        boost *= field.getBoost();      }      /** Only called when term vectors are enabled.  This       *  is called the first time we see a given term for       *  each * document, to allocate a PostingVector       *  instance that * is used to record data needed to       *  write the posting * vectors. */      private PostingVector addNewVector() {        if (postingsVectorsUpto == postingsVectors.length) {          final int newSize;          if (postingsVectors.length < 2)            newSize = 2;          else            newSize = (int) (1.5*postingsVectors.length);          PostingVector[] newArray = new PostingVector[newSize];          System.arraycopy(postingsVectors, 0, newArray, 0, postingsVectors.length);          postingsVectors = newArray;        }                p.vector = postingsVectors[postingsVectorsUpto];        if (p.vector == null)          p.vector = postingsVectors[postingsVectorsUpto] = new PostingVector();        postingsVectorsUpto++;        final PostingVector v = p.vector;        v.p = p;        final int firstSize = levelSizeArray[0];        if (doVectorPositions) {          final int upto = vectorsPool.newSlice(firstSize);          v.posStart = v.posUpto = vectorsPool.byteOffset + upto;
💿 文件大小 5390 K
👤 上传用户 rickie936
📂 所属分类 Java编程
🏷️ 相关标签

#SearchEngine #open-source #Framework #Lucene
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -