📄 checkindex.java

📁 lucene-2.4.0 是一个全文收索的工具包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.Directory;import org.apache.lucene.store.IndexInput;import org.apache.lucene.document.Document;import java.text.NumberFormat;import java.io.PrintStream;import java.io.IOException;import java.util.Collection;import java.util.Iterator;import java.util.List;import java.util.ArrayList;import org.apache.lucene.document.Fieldable;          // for javadoc/** * Basic tool and API to check the health of an index and * write a new segments file that removes reference to * problematic segments. *  * <p>As this tool checks every byte in the index, on a large * index it can take quite a long time to run. * * <p><b>WARNING</b>: this tool and API is new and * experimental and is subject to suddenly change in the * next release.  Please make a complete backup of your * index before using this to fix your index! */public class CheckIndex {  /** Default PrintStream for all CheckIndex instances.   *  @deprecated Use {@link #setInfoStream} per instance,   *  instead. */  public static PrintStream out = null;  private PrintStream infoStream;  private Directory dir;  /**   * Returned from {@link #checkIndex()} detailing the health and status of the index.   *   * <p><b>WARNING</b>: this API is new and experimental and is   * subject to suddenly change in the next release.   **/  public static class Status {    /** True if no problems were found with the index. */    public boolean clean;    /** True if we were unable to locate and load the segments_N file. */    public boolean missingSegments;    /** True if we were unable to open the segments_N file. */    public boolean cantOpenSegments;    /** True if we were unable to read the version number from segments_N file. */    public boolean missingSegmentVersion;    /** Name of latest segments_N file in the index. */    public String segmentsFileName;    /** Number of segments in the index. */    public int numSegments;    /** String description of the version of the index. */    public String segmentFormat;    /** Empty unless you passed specific segments list to check as optional 3rd argument.     *  @see CheckIndex#checkIndex(List) */    public List/*<String>*/ segmentsChecked = new ArrayList();      /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */    public boolean toolOutOfDate;    /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */    public List/*<SegmentInfoStatus*/ segmentInfos = new ArrayList();      /** Directory index is in. */    public Directory dir;    /** SegmentInfos instance containing only segments that     *  had no problems (this is used with the {@link     *  CheckIndex#fix} method to repair the index. */    SegmentInfos newSegments;    /** How many documents will be lost to bad segments. */    public int totLoseDocCount;    /** How many bad segments were found. */    public int numBadSegments;    /** True if we checked only specific segments ({@link     * #checkIndex(List)}) was called with non-null     * argument). */    public boolean partial;    /** Holds the status of each segment in the index.     *  See {@link #segmentInfos}.     *     * <p><b>WARNING</b>: this API is new and experimental and is     * subject to suddenly change in the next release.     */    public static class SegmentInfoStatus {      /** Name of the segment. */      public String name;      /** Document count (does not take deletions into account). */      public int docCount;      /** True if segment is compound file format. */      public boolean compound;      /** Number of files referenced by this segment. */      public int numFiles;      /** Net size (MB) of the files referenced by this       *  segment. */      public double sizeMB;      /** Doc store offset, if this segment shares the doc       *  store files (stored fields and term vectors) with       *  other segments.  This is -1 if it does not share. */      public int docStoreOffset = -1;          /** String of the shared doc store segment, or null if       *  this segment does not share the doc store files. */      public String docStoreSegment;      /** True if the shared doc store files are compound file       *  format. */      public boolean docStoreCompoundFile;      /** True if this segment has pending deletions. */      public boolean hasDeletions;      /** Name of the current deletions file name. */      public String deletionsFileName;          /** Number of deleted documents. */      public int numDeleted;      /** True if we were able to open a SegmentReader on this       *  segment. */      public boolean openReaderPassed;      /** Number of fields in this segment. */      int numFields;      /** True if at least one of the fields in this segment       *  does not omitTf.       *  @see Fieldable#setOmitTf */      public boolean hasProx;    }  }  /** Create a new CheckIndex on the directory. */  public CheckIndex(Directory dir) {    this.dir = dir;    infoStream = out;  }  /** Set infoStream where messages should go.  If null, no   *  messages are printed */  public void setInfoStream(PrintStream out) {    infoStream = out;  }  private void msg(String msg) {    if (infoStream != null)      infoStream.println(msg);  }  private static class MySegmentTermDocs extends SegmentTermDocs {    int delCount;    MySegmentTermDocs(SegmentReader p) {          super(p);    }    public void seek(Term term) throws IOException {      super.seek(term);      delCount = 0;    }    protected void skippingDoc() throws IOException {      delCount++;    }  }  /** Returns true if index is clean, else false.    *  @deprecated Please instantiate a CheckIndex and then use {@link #checkIndex()} instead */  public static boolean check(Directory dir, boolean doFix) throws IOException {    return check(dir, doFix, null);  }  /** Returns true if index is clean, else false.   *  @deprecated Please instantiate a CheckIndex and then use {@link #checkIndex(List)} instead */  public static boolean check(Directory dir, boolean doFix, List onlySegments) throws IOException {    CheckIndex checker = new CheckIndex(dir);    Status status = checker.checkIndex(onlySegments);    if (doFix && !status.clean)      checker.fixIndex(status);    return status.clean;  }  /** Returns a {@link Status} instance detailing   *  the state of the index.   *   *  <p>As this method checks every byte in the index, on a large   *  index it can take quite a long time to run.   *   *  <p><b>WARNING</b>: make sure   *  you only call this when the index is not opened by any   *  writer. */  public Status checkIndex() throws IOException {    return checkIndex(null);  }  /** Returns a {@link Status} instance detailing   *  the state of the index.   *    *  @param onlySegments list of specific segment names to check   *   *  <p>As this method checks every byte in the specified   *  segments, on a large index it can take quite a long   *  time to run.   *   *  <p><b>WARNING</b>: make sure   *  you only call this when the index is not opened by any   *  writer. */  public Status checkIndex(List onlySegments) throws IOException {    NumberFormat nf = NumberFormat.getInstance();    SegmentInfos sis = new SegmentInfos();    Status result = new Status();    result.dir = dir;    try {      sis.read(dir);    } catch (Throwable t) {      msg("ERROR: could not read any segments file in directory");      result.missingSegments = true;      if (infoStream != null)        t.printStackTrace(infoStream);      return result;    }    final int numSegments = sis.size();    final String segmentsFileName = sis.getCurrentSegmentFileName();    IndexInput input = null;    try {      input = dir.openInput(segmentsFileName);    } catch (Throwable t) {      msg("ERROR: could not open segments file in directory");      if (infoStream != null)        t.printStackTrace(infoStream);      result.cantOpenSegments = true;      return result;    }    int format = 0;    try {      format = input.readInt();    } catch (Throwable t) {      msg("ERROR: could not read segment file version in directory");      if (infoStream != null)        t.printStackTrace(infoStream);      result.missingSegmentVersion = true;      return result;    } finally {      if (input != null)        input.close();    }    String sFormat = "";    boolean skip = false;    if (format == SegmentInfos.FORMAT)      sFormat = "FORMAT [Lucene Pre-2.1]";    if (format == SegmentInfos.FORMAT_LOCKLESS)      sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";    else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)      sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";    else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)      sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";    else {      if (format == SegmentInfos.FORMAT_CHECKSUM)        sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";      else if (format == SegmentInfos.FORMAT_DEL_COUNT)        sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";      else if (format == SegmentInfos.FORMAT_HAS_PROX)        sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";      else if (format < SegmentInfos.CURRENT_FORMAT) {        sFormat = "int=" + format + " [newer version of Lucene than this tool]";        skip = true;      } else {        sFormat = format + " [Lucene 1.3 or prior]";      }    }    msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat);    result.segmentsFileName = segmentsFileName;    result.numSegments = numSegments;    result.segmentFormat = sFormat;    if (onlySegments != null) {      result.partial = true;      if (infoStream != null)        infoStream.print("\nChecking only these segments:");      Iterator it = onlySegments.iterator();      while (it.hasNext()) {        if (infoStream != null)          infoStream.print(" " + it.next());      }      result.segmentsChecked.addAll(onlySegments);      msg(":");    }    if (skip) {      msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");      result.toolOutOfDate = true;      return result;    }    result.newSegments = (SegmentInfos) sis.clone();    result.newSegments.clear();    for(int i=0;i<numSegments;i++) {      final SegmentInfo info = sis.info(i);      if (onlySegments != null && !onlySegments.contains(info.name))        continue;      Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();      result.segmentInfos.add(segInfoStat);      msg("  " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);      segInfoStat.name = info.name;      segInfoStat.docCount = info.docCount;
12 下一页
💿 文件大小 748 K
👤 上传用户 Rosa_
📂 所属分类 Java编程
🏷️ 相关标签

#lucene #工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -