📄 checkindex.java
字号:
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.Directory;import org.apache.lucene.store.IndexInput;import org.apache.lucene.document.Document;import java.text.NumberFormat;import java.io.PrintStream;import java.io.IOException;import java.util.Collection;import java.util.Iterator;import java.util.List;import java.util.ArrayList;import org.apache.lucene.document.Fieldable; // for javadoc/** * Basic tool and API to check the health of an index and * write a new segments file that removes reference to * problematic segments. * * <p>As this tool checks every byte in the index, on a large * index it can take quite a long time to run. * * <p><b>WARNING</b>: this tool and API is new and * experimental and is subject to suddenly change in the * next release. Please make a complete backup of your * index before using this to fix your index! */public class CheckIndex { /** Default PrintStream for all CheckIndex instances. * @deprecated Use {@link #setInfoStream} per instance, * instead. */ public static PrintStream out = null; private PrintStream infoStream; private Directory dir; /** * Returned from {@link #checkIndex()} detailing the health and status of the index. * * <p><b>WARNING</b>: this API is new and experimental and is * subject to suddenly change in the next release. **/ public static class Status { /** True if no problems were found with the index. */ public boolean clean; /** True if we were unable to locate and load the segments_N file. */ public boolean missingSegments; /** True if we were unable to open the segments_N file. */ public boolean cantOpenSegments; /** True if we were unable to read the version number from segments_N file. */ public boolean missingSegmentVersion; /** Name of latest segments_N file in the index. */ public String segmentsFileName; /** Number of segments in the index. */ public int numSegments; /** String description of the version of the index. */ public String segmentFormat; /** Empty unless you passed specific segments list to check as optional 3rd argument. * @see CheckIndex#checkIndex(List) */ public List/*<String>*/ segmentsChecked = new ArrayList(); /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */ public boolean toolOutOfDate; /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */ public List/*<SegmentInfoStatus*/ segmentInfos = new ArrayList(); /** Directory index is in. */ public Directory dir; /** SegmentInfos instance containing only segments that * had no problems (this is used with the {@link * CheckIndex#fix} method to repair the index. */ SegmentInfos newSegments; /** How many documents will be lost to bad segments. */ public int totLoseDocCount; /** How many bad segments were found. */ public int numBadSegments; /** True if we checked only specific segments ({@link * #checkIndex(List)}) was called with non-null * argument). */ public boolean partial; /** Holds the status of each segment in the index. * See {@link #segmentInfos}. * * <p><b>WARNING</b>: this API is new and experimental and is * subject to suddenly change in the next release. */ public static class SegmentInfoStatus { /** Name of the segment. */ public String name; /** Document count (does not take deletions into account). */ public int docCount; /** True if segment is compound file format. */ public boolean compound; /** Number of files referenced by this segment. */ public int numFiles; /** Net size (MB) of the files referenced by this * segment. */ public double sizeMB; /** Doc store offset, if this segment shares the doc * store files (stored fields and term vectors) with * other segments. This is -1 if it does not share. */ public int docStoreOffset = -1; /** String of the shared doc store segment, or null if * this segment does not share the doc store files. */ public String docStoreSegment; /** True if the shared doc store files are compound file * format. */ public boolean docStoreCompoundFile; /** True if this segment has pending deletions. */ public boolean hasDeletions; /** Name of the current deletions file name. */ public String deletionsFileName; /** Number of deleted documents. */ public int numDeleted; /** True if we were able to open a SegmentReader on this * segment. */ public boolean openReaderPassed; /** Number of fields in this segment. */ int numFields; /** True if at least one of the fields in this segment * does not omitTf. * @see Fieldable#setOmitTf */ public boolean hasProx; } } /** Create a new CheckIndex on the directory. */ public CheckIndex(Directory dir) { this.dir = dir; infoStream = out; } /** Set infoStream where messages should go. If null, no * messages are printed */ public void setInfoStream(PrintStream out) { infoStream = out; } private void msg(String msg) { if (infoStream != null) infoStream.println(msg); } private static class MySegmentTermDocs extends SegmentTermDocs { int delCount; MySegmentTermDocs(SegmentReader p) { super(p); } public void seek(Term term) throws IOException { super.seek(term); delCount = 0; } protected void skippingDoc() throws IOException { delCount++; } } /** Returns true if index is clean, else false. * @deprecated Please instantiate a CheckIndex and then use {@link #checkIndex()} instead */ public static boolean check(Directory dir, boolean doFix) throws IOException { return check(dir, doFix, null); } /** Returns true if index is clean, else false. * @deprecated Please instantiate a CheckIndex and then use {@link #checkIndex(List)} instead */ public static boolean check(Directory dir, boolean doFix, List onlySegments) throws IOException { CheckIndex checker = new CheckIndex(dir); Status status = checker.checkIndex(onlySegments); if (doFix && !status.clean) checker.fixIndex(status); return status.clean; } /** Returns a {@link Status} instance detailing * the state of the index. * * <p>As this method checks every byte in the index, on a large * index it can take quite a long time to run. * * <p><b>WARNING</b>: make sure * you only call this when the index is not opened by any * writer. */ public Status checkIndex() throws IOException { return checkIndex(null); } /** Returns a {@link Status} instance detailing * the state of the index. * * @param onlySegments list of specific segment names to check * * <p>As this method checks every byte in the specified * segments, on a large index it can take quite a long * time to run. * * <p><b>WARNING</b>: make sure * you only call this when the index is not opened by any * writer. */ public Status checkIndex(List onlySegments) throws IOException { NumberFormat nf = NumberFormat.getInstance(); SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.read(dir); } catch (Throwable t) { msg("ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) t.printStackTrace(infoStream); return result; } final int numSegments = sis.size(); final String segmentsFileName = sis.getCurrentSegmentFileName(); IndexInput input = null; try { input = dir.openInput(segmentsFileName); } catch (Throwable t) { msg("ERROR: could not open segments file in directory"); if (infoStream != null) t.printStackTrace(infoStream); result.cantOpenSegments = true; return result; } int format = 0; try { format = input.readInt(); } catch (Throwable t) { msg("ERROR: could not read segment file version in directory"); if (infoStream != null) t.printStackTrace(infoStream); result.missingSegmentVersion = true; return result; } finally { if (input != null) input.close(); } String sFormat = ""; boolean skip = false; if (format == SegmentInfos.FORMAT) sFormat = "FORMAT [Lucene Pre-2.1]"; if (format == SegmentInfos.FORMAT_LOCKLESS) sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; else { if (format == SegmentInfos.FORMAT_CHECKSUM) sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_DEL_COUNT) sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; else if (format == SegmentInfos.FORMAT_HAS_PROX) sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; else if (format < SegmentInfos.CURRENT_FORMAT) { sFormat = "int=" + format + " [newer version of Lucene than this tool]"; skip = true; } else { sFormat = format + " [Lucene 1.3 or prior]"; } } msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); result.segmentsFileName = segmentsFileName; result.numSegments = numSegments; result.segmentFormat = sFormat; if (onlySegments != null) { result.partial = true; if (infoStream != null) infoStream.print("\nChecking only these segments:"); Iterator it = onlySegments.iterator(); while (it.hasNext()) { if (infoStream != null) infoStream.print(" " + it.next()); } result.segmentsChecked.addAll(onlySegments); msg(":"); } if (skip) { msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } result.newSegments = (SegmentInfos) sis.clone(); result.newSegments.clear(); for(int i=0;i<numSegments;i++) { final SegmentInfo info = sis.info(i); if (onlySegments != null && !onlySegments.contains(info.name)) continue; Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.add(segInfoStat); msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); segInfoStat.name = info.name; segInfoStat.docCount = info.docCount;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -