📄 documentswriter.java
字号:
if (tvx != null) { try { tvx.close(); } catch (Throwable t) { } tvx = null; } if (tvd != null) { try { tvd.close(); } catch (Throwable t) { } tvd = null; } if (tvf != null) { try { tvf.close(); } catch (Throwable t) { } tvf = null; } // Reset fields writer if (fieldsWriter != null) { try { fieldsWriter.close(); } catch (Throwable t) { } fieldsWriter = null; } // Discard pending norms: final int numField = fieldInfos.size(); for (int i=0;i<numField;i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { BufferedNorms n = norms[i]; if (n != null) try { n.reset(); } catch (Throwable t) { } } } // Reset all postings data resetPostingsData(); } finally { resumeAllThreads(); } // If we have a root cause exception, re-throw it now: if (ae != null) { Throwable t = ae.getCause(); if (t instanceof IOException) throw (IOException) t; else if (t instanceof RuntimeException) throw (RuntimeException) t; else if (t instanceof Error) throw (Error) t; else // Should not get here assert false: "unknown exception: " + t; } } finally { if (ae != null) abortCount--; notifyAll(); } } /** Reset after a flush */ private void resetPostingsData() throws IOException { // All ThreadStates should be idle when we are called assert allThreadsIdle(); threadBindings.clear(); segment = null; numDocsInRAM = 0; nextDocID = 0; nextWriteDocID = 0; files = null; balanceRAM(); bufferIsFull = false; flushPending = false; for(int i=0;i<threadStates.length;i++) { threadStates[i].numThreads = 0; threadStates[i].resetPostings(); } numBytesUsed = 0; } // Returns true if an abort is in progress synchronized boolean pauseAllThreads() { pauseThreads++; while(!allThreadsIdle()) { try { wait(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } return abortCount > 0; } synchronized void resumeAllThreads() { pauseThreads--; assert pauseThreads >= 0; if (0 == pauseThreads) notifyAll(); } private synchronized boolean allThreadsIdle() { for(int i=0;i<threadStates.length;i++) if (!threadStates[i].isIdle) return false; return true; } private boolean hasNorms; // Whether any norms were seen since last flush List newFiles; /** Flush all pending docs to a new segment */ synchronized int flush(boolean closeDocStore) throws IOException { assert allThreadsIdle(); if (segment == null) // In case we are asked to flush an empty segment segment = writer.newSegmentName(); newFiles = new ArrayList(); docStoreOffset = numDocsInStore; int docCount; assert numDocsInRAM > 0; if (infoStream != null) infoStream.println("\nflush postings as segment " + segment + " numDocs=" + numDocsInRAM); boolean success = false; try { if (closeDocStore) { assert docStoreSegment != null; assert docStoreSegment.equals(segment); newFiles.addAll(files()); closeDocStore(); } fieldInfos.write(directory, segment + ".fnm"); docCount = numDocsInRAM; newFiles.addAll(writeSegment()); success = true; } finally { if (!success) abort(null); } return docCount; } /** Build compound file for the segment we just flushed */ void createCompoundFile(String segment) throws IOException { CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); final int size = newFiles.size(); for(int i=0;i<size;i++) cfsWriter.addFile((String) newFiles.get(i)); // Perform the merge cfsWriter.close(); } /** Set flushPending if it is not already set and returns * whether it was set. This is used by IndexWriter to * * trigger a single flush even when multiple threads are * * trying to do so. */ synchronized boolean setFlushPending() { if (flushPending) return false; else { flushPending = true; return true; } } synchronized void clearFlushPending() { flushPending = false; } /** Per-thread state. We keep a separate Posting hash and * other state for each thread and then merge postings * * hashes from all threads when writing the segment. */ private final class ThreadState { Posting[] postingsFreeList; // Free Posting instances int postingsFreeCount; RAMOutputStream tvfLocal = new RAMOutputStream(); // Term vectors for one doc RAMOutputStream fdtLocal = new RAMOutputStream(); // Stored fields for one doc FieldsWriter localFieldsWriter; // Fields for one doc long[] vectorFieldPointers; int[] vectorFieldNumbers; boolean isIdle = true; // Whether we are in use int numThreads = 1; // Number of threads that use this instance int docID; // docID we are now working on int numStoredFields; // How many stored fields in current doc float docBoost; // Boost for current doc FieldData[] fieldDataArray; // Fields touched by current doc int numFieldData; // How many fields in current doc int numVectorFields; // How many vector fields in current doc FieldData[] allFieldDataArray = new FieldData[10]; // All FieldData instances int numAllFieldData; FieldData[] fieldDataHash; // Hash FieldData instances by field name int fieldDataHashMask; String maxTermPrefix; // Non-null prefix of a too-large term if this // doc has one boolean doFlushAfter; public ThreadState() { fieldDataArray = new FieldData[8]; fieldDataHash = new FieldData[16]; fieldDataHashMask = 15; vectorFieldPointers = new long[10]; vectorFieldNumbers = new int[10]; postingsFreeList = new Posting[256]; postingsFreeCount = 0; } /** Clear the postings hash and return objects back to * shared pool */ public void resetPostings() throws IOException { fieldGen = 0; maxPostingsVectors = 0; doFlushAfter = false; if (localFieldsWriter != null) { localFieldsWriter.close(); localFieldsWriter = null; } postingsPool.reset(); charPool.reset(); recyclePostings(postingsFreeList, postingsFreeCount); postingsFreeCount = 0; for(int i=0;i<numAllFieldData;i++) { FieldData fp = allFieldDataArray[i]; fp.lastGen = -1; if (fp.numPostings > 0) fp.resetPostingArrays(); } } /** Move all per-document state that was accumulated in * the ThreadState into the "real" stores. */ public void writeDocument() throws IOException, AbortException { // If we hit an exception while appending to the // stored fields or term vectors files, we have to // abort all documents since we last flushed because // it means those files are possibly inconsistent. try { numDocsInStore++; // Append stored fields to the real FieldsWriter: fieldsWriter.flushDocument(numStoredFields, fdtLocal); fdtLocal.reset(); // Append term vectors to the real outputs: if (tvx != null) { tvx.writeLong(tvd.getFilePointer()); tvd.writeVInt(numVectorFields); if (numVectorFields > 0) { for(int i=0;i<numVectorFields;i++) tvd.writeVInt(vectorFieldNumbers[i]); assert 0 == vectorFieldPointers[0]; tvd.writeVLong(tvf.getFilePointer()); long lastPos = vectorFieldPointers[0]; for(int i=1;i<numVectorFields;i++) { long pos = vectorFieldPointers[i]; tvd.writeVLong(pos-lastPos); lastPos = pos; } tvfLocal.writeTo(tvf); tvfLocal.reset(); } } // Append norms for the fields we saw: for(int i=0;i<numFieldData;i++) { FieldData fp = fieldDataArray[i]; if (fp.doNorms) { BufferedNorms bn = norms[fp.fieldInfo.number]; assert bn != null; assert bn.upto <= docID; bn.fill(docID); float norm = fp.boost * writer.getSimilarity().lengthNorm(fp.fieldInfo.name, fp.length); bn.add(norm); } } } catch (Throwable t) { // Forcefully idle this threadstate -- its state will // be reset by abort() isIdle = true; throw new AbortException(t, DocumentsWriter.this); } if (bufferIsFull && !flushPending) { flushPending = true; doFlushAfter = true; } } int fieldGen; /** Initializes shared state for this new document */ void init(Document doc, int docID) throws IOException, AbortException { assert !isIdle; assert writer.testPoint("DocumentsWriter.ThreadState.init start"); this.docID = docID; docBoost = doc.getBoost(); numStoredFields = 0; numFieldData = 0; numVectorFields = 0; maxTermPrefix = null; assert 0 == fdtLocal.length(); assert 0 == fdtLocal.getFilePointer(); assert 0 == tvfLocal.length(); assert 0 == tvfLocal.getFilePointer(); final int thisFieldGen = fieldGen++; List docFields = doc.getFields(); final int numDocFields = docFields.size(); boolean docHasVectors = false; // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): for(int i=0;i<numDocFields;i++) { Fieldable field = (Fieldable) docFields.get(i); FieldInfo fi = fieldInfos.add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false); if (fi.isIndexed && !fi.omitNorms) { // Maybe grow our buffered norms if (norms.length <= fi.number) { int newSize = (int) ((1+fi.number)*1.25); BufferedNorms[] newNorms = new BufferedNorms[newSize]; System.arraycopy(norms, 0, newNorms, 0, norms.length); norms = newNorms; } if (norms[fi.number] == null) norms[fi.number] = new BufferedNorms(); hasNorms = true; } // Make sure we have a FieldData allocated int hashPos = fi.name.hashCode() & fieldDataHashMask; FieldData fp = fieldDataHash[hashPos]; while(fp != null && !fp.fieldInfo.name.equals(fi.name)) fp = fp.next; if (fp == null) { fp = new FieldData(fi); fp.next = fieldDataHash[hashPos]; fieldDataHash[hashPos] = fp; if (numAllFieldData == allFieldDataArray.length) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -