📄 checkindex.java
字号:
int toLoseDocCount = info.docCount; SegmentReader reader = null; try { msg(" compound=" + info.getUseCompoundFile()); segInfoStat.compound = info.getUseCompoundFile(); msg(" hasProx=" + info.getHasProx()); segInfoStat.hasProx = info.getHasProx(); msg(" numFiles=" + info.files().size()); segInfoStat.numFiles = info.files().size(); msg(" size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.))); segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.); final int docStoreOffset = info.getDocStoreOffset(); if (docStoreOffset != -1) { msg(" docStoreOffset=" + docStoreOffset); segInfoStat.docStoreOffset = docStoreOffset; msg(" docStoreSegment=" + info.getDocStoreSegment()); segInfoStat.docStoreSegment = info.getDocStoreSegment(); msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile()); segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile(); } final String delFileName = info.getDelFileName(); if (delFileName == null){ msg(" no deletions"); segInfoStat.hasDeletions = false; } else{ msg(" has deletions [delFileName=" + delFileName + "]"); segInfoStat.hasDeletions = true; segInfoStat.deletionsFileName = delFileName; } if (infoStream != null) infoStream.print(" test: open reader........."); reader = SegmentReader.get(info); final int numDocs = reader.numDocs(); toLoseDocCount = numDocs; if (reader.hasDeletions()) { if (info.docCount - numDocs != info.getDelCount()){ throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); } segInfoStat.numDeleted = info.docCount - numDocs; msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); } else { if (info.getDelCount() != 0){ throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); } msg("OK"); } if (infoStream != null) infoStream.print(" test: fields, norms......."); Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); Iterator it = fieldNames.iterator(); while(it.hasNext()) { final String fieldName = (String) it.next(); byte[] b = reader.norms(fieldName); if (b.length != info.docCount) throw new RuntimeException("norms for field \"" + fieldName + "\" is length " + b.length + " != maxDoc " + info.docCount); } msg("OK [" + fieldNames.size() + " fields]"); segInfoStat.numFields = fieldNames.size(); if (infoStream != null) infoStream.print(" test: terms, freq, prox..."); final TermEnum termEnum = reader.terms(); final TermPositions termPositions = reader.termPositions(); // Used only to count up # deleted docs for this // term final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); long termCount = 0; long totFreq = 0; long totPos = 0; while(termEnum.next()) { termCount++; final Term term = termEnum.term(); final int docFreq = termEnum.docFreq(); termPositions.seek(term); int lastDoc = -1; int freq0 = 0; totFreq += docFreq; while(termPositions.next()) { freq0++; final int doc = termPositions.doc(); final int freq = termPositions.freq(); if (doc <= lastDoc) throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); lastDoc = doc; if (freq <= 0) throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); int lastPos = -1; totPos += freq; for(int j=0;j<freq;j++) { final int pos = termPositions.nextPosition(); if (pos < -1) throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); if (pos < lastPos) throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); } } // Now count how many deleted docs occurred in // this term: final int delCount; if (reader.hasDeletions()) { myTermDocs.seek(term); while(myTermDocs.next()) { } delCount = myTermDocs.delCount; } else delCount = 0; if (freq0 + delCount != docFreq) throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); } msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]"); if (infoStream != null) infoStream.print(" test: stored fields......."); int docCount = 0; long totFields = 0; for(int j=0;j<info.docCount;j++) if (!reader.isDeleted(j)) { docCount++; Document doc = reader.document(j); totFields += doc.getFields().size(); } if (docCount != reader.numDocs()) throw new RuntimeException("docCount=" + docCount + " but saw " + docCount + " undeleted docs"); msg("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]"); if (infoStream != null) infoStream.print(" test: term vectors........"); int totVectors = 0; for(int j=0;j<info.docCount;j++) if (!reader.isDeleted(j)) { TermFreqVector[] tfv = reader.getTermFreqVectors(j); if (tfv != null) totVectors += tfv.length; } msg("OK [" + totVectors + " total vector count; avg " + nf.format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]"); msg(""); } catch (Throwable t) { msg("FAILED"); String comment; comment = "fixIndex() would remove reference to this segment"; msg(" WARNING: " + comment + "; full exception:"); if (infoStream != null) t.printStackTrace(infoStream); msg(""); result.totLoseDocCount += toLoseDocCount; result.numBadSegments++; continue; } finally { if (reader != null) reader.close(); } // Keeper result.newSegments.add(info.clone()); } if (0 == result.numBadSegments) { result.clean = true; msg("No problems were detected with this index.\n"); } else msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); return result; } /** Repairs the index using previously returned result * from {@link #checkIndex}. Note that this does not * remove any of the unreferenced files after it's done; * you must separately open an {@link IndexWriter}, which * deletes unreferenced files when it's created. * * <p><b>WARNING</b>: this writes a * new segments file into the index, effectively removing * all documents in broken segments from the index. * BE CAREFUL. * * <p><b>WARNING</b>: Make sure you only call this when the * index is not opened by any writer. */ public void fixIndex(Status result) throws IOException { if (result.partial) throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); result.newSegments.commit(result.dir); } private static boolean assertsOn; private static boolean testAsserts() { assertsOn = true; return true; } private static boolean assertsOn() { assert testAsserts(); return assertsOn; } /** Command-line interface to check and fix an index. <p> Run it like this: <pre> java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] </pre> <ul> <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments <li><code>-segment X</code>: only check the specified segment(s). This can be specified multiple times, to check more than one segment, eg <code>-segment _2 -segment _a</code>. You can't use this with the -fix option. </ul> <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause documents (perhaps many) to be permanently removed from the index. Always make a backup copy of your index before running this! Do not run this tool on an index that is actively being written to. You have been warned! <p> Run without -fix, this tool will open the index, report version information and report any exceptions it hits and what action it would take if -fix were specified. With -fix, this tool will remove any segments that have issues and write a new segments_N file. This means all documents contained in the affected segments will be removed. <p> This tool exits with exit code 1 if the index cannot be opened or has any corruption, else 0. */ public static void main(String[] args) throws IOException { boolean doFix = false; List onlySegments = new ArrayList(); String indexPath = null; int i = 0; while(i < args.length) { if (args[i].equals("-fix")) { doFix = true; i++; } else if (args[i].equals("-segment")) { if (i == args.length-1) { System.out.println("ERROR: missing name for -segment option"); System.exit(1); } onlySegments.add(args[i+1]); i += 2; } else { if (indexPath != null) { System.out.println("ERROR: unexpected extra argument '" + args[i] + "'"); System.exit(1); } indexPath = args[i]; i++; } } if (indexPath == null) { System.out.println("\nERROR: index path not specified"); System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n"); System.exit(1); } if (!assertsOn()) System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled"); if (onlySegments.size() == 0) onlySegments = null; else if (doFix) { System.out.println("ERROR: cannot specify both -fix and -segment"); System.exit(1); } System.out.println("\nOpening index @ " + indexPath + "\n"); Directory dir = null; try { dir = FSDirectory.getDirectory(indexPath); } catch (Throwable t) { System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting"); t.printStackTrace(System.out); System.exit(1); } CheckIndex checker = new CheckIndex(dir); checker.setInfoStream(System.out); Status result = checker.checkIndex(onlySegments); if (!result.clean) { if (!doFix) { System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); } else { System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for(int s=0;s<5;s++) { try { Thread.sleep(1000); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); s--; continue; } System.out.println(" " + (5-s) + "..."); } System.out.println("Writing..."); checker.fixIndex(result); System.out.println("OK"); System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\""); } } System.out.println(""); final int exitCode; if (result != null && result.clean == true) exitCode = 0; else exitCode = 1; System.exit(exitCode); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -