📄 documentswriter.java
字号:
return -1; else if (c2 < c1) if (0xffff == c1) return -1; else return 1; else if (0xffff == c1) return 0; } } /** Write vInt into freq stream of current Posting */ public void writeFreqVInt(int i) { while ((i & ~0x7F) != 0) { writeFreqByte((byte)((i & 0x7f) | 0x80)); i >>>= 7; } writeFreqByte((byte) i); } /** Write vInt into prox stream of current Posting */ public void writeProxVInt(int i) { while ((i & ~0x7F) != 0) { writeProxByte((byte)((i & 0x7f) | 0x80)); i >>>= 7; } writeProxByte((byte) i); } /** Write byte into freq stream of current Posting */ byte[] freq; int freqUpto; public void writeFreqByte(byte b) { assert freq != null; if (freq[freqUpto] != 0) { freqUpto = postingsPool.allocSlice(freq, freqUpto); freq = postingsPool.buffer; p.freqUpto = postingsPool.byteOffset; } freq[freqUpto++] = b; } /** Write byte into prox stream of current Posting */ byte[] prox; int proxUpto; public void writeProxByte(byte b) { assert prox != null; if (prox[proxUpto] != 0) { proxUpto = postingsPool.allocSlice(prox, proxUpto); prox = postingsPool.buffer; p.proxUpto = postingsPool.byteOffset; assert prox != null; } prox[proxUpto++] = b; assert proxUpto != prox.length; } /** Currently only used to copy a payload into the prox * stream. */ public void writeProxBytes(byte[] b, int offset, int len) { final int offsetEnd = offset + len; while(offset < offsetEnd) { if (prox[proxUpto] != 0) { // End marker proxUpto = postingsPool.allocSlice(prox, proxUpto); prox = postingsPool.buffer; p.proxUpto = postingsPool.byteOffset; } prox[proxUpto++] = b[offset++]; assert proxUpto != prox.length; } } /** Write vInt into offsets stream of current * PostingVector */ public void writeOffsetVInt(int i) { while ((i & ~0x7F) != 0) { writeOffsetByte((byte)((i & 0x7f) | 0x80)); i >>>= 7; } writeOffsetByte((byte) i); } byte[] offsets; int offsetUpto; /** Write byte into offsets stream of current * PostingVector */ public void writeOffsetByte(byte b) { assert offsets != null; if (offsets[offsetUpto] != 0) { offsetUpto = vectorsPool.allocSlice(offsets, offsetUpto); offsets = vectorsPool.buffer; vector.offsetUpto = vectorsPool.byteOffset; } offsets[offsetUpto++] = b; } /** Write vInt into pos stream of current * PostingVector */ public void writePosVInt(int i) { while ((i & ~0x7F) != 0) { writePosByte((byte)((i & 0x7f) | 0x80)); i >>>= 7; } writePosByte((byte) i); } byte[] pos; int posUpto; /** Write byte into pos stream of current * PostingVector */ public void writePosByte(byte b) { assert pos != null; if (pos[posUpto] != 0) { posUpto = vectorsPool.allocSlice(pos, posUpto); pos = vectorsPool.buffer; vector.posUpto = vectorsPool.byteOffset; } pos[posUpto++] = b; } PostingVector[] postingsVectors = new PostingVector[1]; int maxPostingsVectors; // Used to read a string value for a field ReusableStringReader stringReader = new ReusableStringReader(); /** Holds data associated with a single field, including * the Postings hash. A document may have many * * occurrences for a given field name; we gather all * * such occurrences here (in docFields) so that we can * * process the entire field at once. */ private final class FieldData implements Comparable { ThreadState threadState; FieldInfo fieldInfo; int fieldCount; Fieldable[] docFields = new Fieldable[1]; int lastGen = -1; FieldData next; boolean doNorms; boolean doVectors; boolean doVectorPositions; boolean doVectorOffsets; boolean postingsCompacted; int numPostings; Posting[] postingsHash; int postingsHashSize; int postingsHashHalfSize; int postingsHashMask; int position; int length; int offset; float boost; int postingsVectorsUpto; public FieldData(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; threadState = ThreadState.this; } void resetPostingArrays() { if (!postingsCompacted) compactPostings(); recyclePostings(this.postingsHash, numPostings); Arrays.fill(postingsHash, 0, postingsHash.length, null); postingsCompacted = false; numPostings = 0; } void initPostingArrays() { // Target hash fill factor of <= 50% // NOTE: must be a power of two for hash collision // strategy to work correctly postingsHashSize = 4; postingsHashHalfSize = 2; postingsHashMask = postingsHashSize-1; postingsHash = new Posting[postingsHashSize]; } /** So Arrays.sort can sort us. */ public int compareTo(Object o) { return fieldInfo.name.compareTo(((FieldData) o).fieldInfo.name); } private void compactPostings() { int upto = 0; for(int i=0;i<postingsHashSize;i++) if (postingsHash[i] != null) postingsHash[upto++] = postingsHash[i]; assert upto == numPostings; postingsCompacted = true; } /** Collapse the hash table & sort in-place. */ public Posting[] sortPostings() { compactPostings(); doPostingSort(postingsHash, numPostings); return postingsHash; } /** Process all occurrences of one field in the document. */ public void processField(Analyzer analyzer) throws IOException, AbortException { length = 0; position = 0; offset = 0; boost = docBoost; final int maxFieldLength = writer.getMaxFieldLength(); final int limit = fieldCount; final Fieldable[] docFieldsFinal = docFields; boolean doWriteVectors = true; // Walk through all occurrences in this doc for this // field: try { for(int j=0;j<limit;j++) { Fieldable field = docFieldsFinal[j]; if (field.isIndexed()) invertField(field, analyzer, maxFieldLength); if (field.isStored()) { numStoredFields++; boolean success = false; try { localFieldsWriter.writeField(fieldInfo, field); success = true; } finally { // If we hit an exception inside // localFieldsWriter.writeField, the // contents of fdtLocal can be corrupt, so // we must discard all stored fields for // this document: if (!success) fdtLocal.reset(); } } docFieldsFinal[j] = null; } } catch (AbortException ae) { doWriteVectors = false; throw ae; } finally { if (postingsVectorsUpto > 0) { try { if (doWriteVectors) { // Add term vectors for this field boolean success = false; try { writeVectors(fieldInfo); success = true; } finally { if (!success) { // If we hit an exception inside // writeVectors, the contents of tvfLocal // can be corrupt, so we must discard all // term vectors for this document: numVectorFields = 0; tvfLocal.reset(); } } } } finally { if (postingsVectorsUpto > maxPostingsVectors) maxPostingsVectors = postingsVectorsUpto; postingsVectorsUpto = 0; vectorsPool.reset(); } } } } int offsetEnd; Token localToken = new Token(); /* Invert one occurrence of one field in the document */ public void invertField(Fieldable field, Analyzer analyzer, final int maxFieldLength) throws IOException, AbortException { if (length>0) position += analyzer.getPositionIncrementGap(fieldInfo.name); if (!field.isTokenized()) { // un-tokenized field String stringValue = field.stringValue(); final int valueLength = stringValue.length(); Token token = localToken; token.clear(); char[] termBuffer = token.termBuffer(); if (termBuffer.length < valueLength) termBuffer = token.resizeTermBuffer(valueLength); stringValue.getChars(0, valueLength, termBuffer, 0); token.setTermLength(valueLength); token.setStartOffset(offset); token.setEndOffset(offset + stringValue.length()); addPosition(token); offset += stringValue.length(); length++; } else { // tokenized field final TokenStream stream; final TokenStream streamValue = field.tokenStreamValue(); if (streamValue != null) stream = streamValue; else { // the field does not have a TokenStream, // so we have to obtain one from the analyzer final Reader reader; // find or make Reader final Reader readerValue = field.readerValue(); if (readerValue != null) reader = readerValue; else { String stringValue = field.stringValue(); if (stringValue == null) throw new IllegalArgumentException("field must have either TokenStream, String or Reader value"); stringReader.init(stringValue); reader = stringReader; } // Tokenize field and add to postingTable stream = analyzer.reusableTokenStream(fieldInfo.name, reader); } // reset the TokenStream to the first token stream.reset(); try { offsetEnd = offset-1; for(;;) { Token token = stream.next(localToken); if (token == null) break; position += (token.getPositionIncrement() - 1); addPosition(token); if (++length >= maxFieldLength) { if (infoStream != null) infoStream.println("maxFieldLength " +maxFieldLength+ " reached for field " + fieldInfo.name + ", ignoring following tokens"); break; } } offset = offsetEnd+1; } finally { stream.close(); } } boost *= field.getBoost(); } /** Only called when term vectors are enabled. This * is called the first time we see a given term for * each * document, to allocate a PostingVector * instance that * is used to record data needed to * write the posting * vectors. */ private PostingVector addNewVector() { if (postingsVectorsUpto == postingsVectors.length) { final int newSize; if (postingsVectors.length < 2) newSize = 2; else newSize = (int) (1.5*postingsVectors.length); PostingVector[] newArray = new PostingVector[newSize]; System.arraycopy(postingsVectors, 0, newArray, 0, postingsVectors.length); postingsVectors = newArray; } p.vector = postingsVectors[postingsVectorsUpto]; if (p.vector == null) p.vector = postingsVectors[postingsVectorsUpto] = new PostingVector(); postingsVectorsUpto++; final PostingVector v = p.vector; v.p = p; final int firstSize = levelSizeArray[0]; if (doVectorPositions) { final int upto = vectorsPool.newSlice(firstSize); v.posStart = v.posUpto = vectorsPool.byteOffset + upto;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -