⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 documentwriter.java

📁 lucene2.2.0版本
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
          ti.payloads = newPayloads;        }      }      ti.positions[freq] = position;		  // add new position      if (payload != null) {        if (ti.payloads == null) {          // lazily allocate payload array          ti.payloads = new Payload[ti.positions.length];        }        ti.payloads[freq] = payload;      }            if (offset != null) {        if (ti.offsets.length == freq){          TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[freq*2];          TermVectorOffsetInfo [] offsets = ti.offsets;          System.arraycopy(offsets, 0, newOffsets, 0, freq);          ti.offsets = newOffsets;        }        ti.offsets[freq] = offset;      }      ti.freq = freq + 1;			  // update frequency    } else {					  // word not seen before      Term term = new Term(field, text, false);      postingTable.put(term, new Posting(term, position, payload, offset));    }  }  private final Posting[] sortPostingTable() {    // copy postingTable into an array    Posting[] array = new Posting[postingTable.size()];    Enumeration postings = postingTable.elements();    for (int i = 0; postings.hasMoreElements(); i++)      array[i] = (Posting) postings.nextElement();    // sort the array    quickSort(array, 0, array.length - 1);    return array;  }  private static final void quickSort(Posting[] postings, int lo, int hi) {    if (lo >= hi)      return;    int mid = (lo + hi) / 2;    if (postings[lo].term.compareTo(postings[mid].term) > 0) {      Posting tmp = postings[lo];      postings[lo] = postings[mid];      postings[mid] = tmp;    }    if (postings[mid].term.compareTo(postings[hi].term) > 0) {      Posting tmp = postings[mid];      postings[mid] = postings[hi];      postings[hi] = tmp;      if (postings[lo].term.compareTo(postings[mid].term) > 0) {        Posting tmp2 = postings[lo];        postings[lo] = postings[mid];        postings[mid] = tmp2;      }    }    int left = lo + 1;    int right = hi - 1;    if (left >= right)      return;    Term partition = postings[mid].term;    for (; ;) {      while (postings[right].term.compareTo(partition) > 0)        --right;      while (left < right && postings[left].term.compareTo(partition) <= 0)        ++left;      if (left < right) {        Posting tmp = postings[left];        postings[left] = postings[right];        postings[right] = tmp;        --right;      } else {        break;      }    }    quickSort(postings, lo, left);    quickSort(postings, left + 1, hi);  }  private final void writePostings(Posting[] postings, String segment)          throws CorruptIndexException, IOException {    IndexOutput freq = null, prox = null;    TermInfosWriter tis = null;    TermVectorsWriter termVectorWriter = null;    try {      //open files for inverse index storage      freq = directory.createOutput(segment + ".frq");      prox = directory.createOutput(segment + ".prx");      tis = new TermInfosWriter(directory, segment, fieldInfos,                                termIndexInterval);      TermInfo ti = new TermInfo();      String currentField = null;      boolean currentFieldHasPayloads = false;            for (int i = 0; i < postings.length; i++) {        Posting posting = postings[i];        // check to see if we switched to a new field        String termField = posting.term.field();        if (currentField != termField) {          // changing field - see if there is something to save          currentField = termField;          FieldInfo fi = fieldInfos.fieldInfo(currentField);          currentFieldHasPayloads = fi.storePayloads;          if (fi.storeTermVector) {            if (termVectorWriter == null) {              termVectorWriter =                new TermVectorsWriter(directory, segment, fieldInfos);              termVectorWriter.openDocument();            }            termVectorWriter.openField(currentField);          } else if (termVectorWriter != null) {            termVectorWriter.closeField();          }        }                // add an entry to the dictionary with pointers to prox and freq files        ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);        tis.add(posting.term, ti);        // add an entry to the freq file        int postingFreq = posting.freq;        if (postingFreq == 1)				  // optimize freq=1          freq.writeVInt(1);			  // set low bit of doc num.        else {          freq.writeVInt(0);			  // the document number          freq.writeVInt(postingFreq);			  // frequency in doc        }        int lastPosition = 0;			  // write positions        int[] positions = posting.positions;        Payload[] payloads = posting.payloads;        int lastPayloadLength = -1;                        // The following encoding is being used for positions and payloads:        // Case 1: current field does not store payloads        //           Positions     -> <PositionDelta>^freq        //           PositionDelta -> VInt        //         The PositionDelta is the difference between the current        //         and the previous position        // Case 2: current field stores payloads        //           Positions     -> <PositionDelta, Payload>^freq        //           Payload       ->  <PayloadLength?, PayloadData>        //           PositionDelta -> VInt        //           PayloadLength -> VInt        //           PayloadData   -> byte^PayloadLength        //         In this case PositionDelta/2 is the difference between        //         the current and the previous position. If PositionDelta        //         is odd, then a PayloadLength encoded as VInt follows,        //         if PositionDelta is even, then it is assumed that the        //         length of the current Payload equals the length of the        //         previous Payload.                for (int j = 0; j < postingFreq; j++) {		  // use delta-encoding          int position = positions[j];          int delta = position - lastPosition;          if (currentFieldHasPayloads) {            int payloadLength = 0;            Payload payload = null;            if (payloads != null) {              payload = payloads[j];              if (payload != null) {                payloadLength = payload.length;              }            }            if (payloadLength == lastPayloadLength) {            	// the length of the current payload equals the length            	// of the previous one. So we do not have to store the length            	// again and we only shift the position delta by one bit              prox.writeVInt(delta * 2);            } else {            	// the length of the current payload is different from the            	// previous one. We shift the position delta, set the lowest            	// bit and store the current payload length as VInt.              prox.writeVInt(delta * 2 + 1);              prox.writeVInt(payloadLength);              lastPayloadLength = payloadLength;            }            if (payloadLength > 0) {            	// write current payload              prox.writeBytes(payload.data, payload.offset, payload.length);            }          } else {          	// field does not store payloads, just write position delta as VInt            prox.writeVInt(delta);          }          lastPosition = position;        }        if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {            termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);        }      }      if (termVectorWriter != null)        termVectorWriter.closeDocument();    } finally {      // make an effort to close all streams we can but remember and re-throw      // the first exception encountered in this process      IOException keep = null;      if (freq != null) try { freq.close(); } catch (IOException e) { if (keep == null) keep = e; }      if (prox != null) try { prox.close(); } catch (IOException e) { if (keep == null) keep = e; }      if (tis  != null) try {  tis.close(); } catch (IOException e) { if (keep == null) keep = e; }      if (termVectorWriter  != null) try {  termVectorWriter.close(); } catch (IOException e) { if (keep == null) keep = e; }      if (keep != null) throw (IOException) keep.fillInStackTrace();    }  }  private final void writeNorms(String segment) throws IOException {     for(int n = 0; n < fieldInfos.size(); n++){      FieldInfo fi = fieldInfos.fieldInfo(n);      if(fi.isIndexed && !fi.omitNorms){        float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]);        IndexOutput norms = directory.createOutput(segment + ".f" + n);        try {          norms.writeByte(Similarity.encodeNorm(norm));        } finally {          norms.close();        }      }    }  }    /** If non-null, a message will be printed to this if maxFieldLength is reached.   */  void setInfoStream(PrintStream infoStream) {    this.infoStream = infoStream;  }  int getNumFields() {    return fieldInfos.size();  }}final class Posting {				  // info about a Term in a doc  Term term;					  // the Term  int freq;					  // its frequency in doc  int[] positions;				  // positions it occurs at  Payload[] payloads; // the payloads of the terms  TermVectorOffsetInfo [] offsets;    Posting(Term t, int position, Payload payload, TermVectorOffsetInfo offset) {    term = t;    freq = 1;    positions = new int[1];    positions[0] = position;        if (payload != null) {      payloads = new Payload[1];      payloads[0] = payload;    } else       payloads = null;            if(offset != null){      offsets = new TermVectorOffsetInfo[1];      offsets[0] = offset;    } else      offsets = null;  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -