📄 checkindex.java

📁 lucene-2.4.0 是一个全文收索的工具包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
      int toLoseDocCount = info.docCount;      SegmentReader reader = null;      try {        msg("    compound=" + info.getUseCompoundFile());        segInfoStat.compound = info.getUseCompoundFile();        msg("    hasProx=" + info.getHasProx());        segInfoStat.hasProx = info.getHasProx();        msg("    numFiles=" + info.files().size());        segInfoStat.numFiles = info.files().size();        msg("    size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.)));        segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);        final int docStoreOffset = info.getDocStoreOffset();        if (docStoreOffset != -1) {          msg("    docStoreOffset=" + docStoreOffset);          segInfoStat.docStoreOffset = docStoreOffset;          msg("    docStoreSegment=" + info.getDocStoreSegment());          segInfoStat.docStoreSegment = info.getDocStoreSegment();          msg("    docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());          segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();        }        final String delFileName = info.getDelFileName();        if (delFileName == null){          msg("    no deletions");          segInfoStat.hasDeletions = false;        }        else{          msg("    has deletions [delFileName=" + delFileName + "]");          segInfoStat.hasDeletions = true;          segInfoStat.deletionsFileName = delFileName;        }        if (infoStream != null)          infoStream.print("    test: open reader.........");        reader = SegmentReader.get(info);        final int numDocs = reader.numDocs();        toLoseDocCount = numDocs;        if (reader.hasDeletions()) {          if (info.docCount - numDocs != info.getDelCount()){            throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));          }          segInfoStat.numDeleted = info.docCount - numDocs;          msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");        } else {          if (info.getDelCount() != 0){            throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));          }          msg("OK");        }        if (infoStream != null)          infoStream.print("    test: fields, norms.......");        Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);        Iterator it = fieldNames.iterator();        while(it.hasNext()) {          final String fieldName = (String) it.next();          byte[] b = reader.norms(fieldName);          if (b.length != info.docCount)            throw new RuntimeException("norms for field \"" + fieldName + "\" is length " + b.length + " != maxDoc " + info.docCount);        }        msg("OK [" + fieldNames.size() + " fields]");        segInfoStat.numFields = fieldNames.size();        if (infoStream != null)          infoStream.print("    test: terms, freq, prox...");        final TermEnum termEnum = reader.terms();        final TermPositions termPositions = reader.termPositions();        // Used only to count up # deleted docs for this        // term        final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);        long termCount = 0;        long totFreq = 0;        long totPos = 0;        while(termEnum.next()) {          termCount++;          final Term term = termEnum.term();          final int docFreq = termEnum.docFreq();          termPositions.seek(term);          int lastDoc = -1;          int freq0 = 0;          totFreq += docFreq;          while(termPositions.next()) {            freq0++;            final int doc = termPositions.doc();            final int freq = termPositions.freq();            if (doc <= lastDoc)              throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);            lastDoc = doc;            if (freq <= 0)              throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");                        int lastPos = -1;            totPos += freq;            for(int j=0;j<freq;j++) {              final int pos = termPositions.nextPosition();              if (pos < -1)                throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");              if (pos < lastPos)                throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);            }          }          // Now count how many deleted docs occurred in          // this term:          final int delCount;          if (reader.hasDeletions()) {            myTermDocs.seek(term);            while(myTermDocs.next()) {            }            delCount = myTermDocs.delCount;          } else            delCount = 0;          if (freq0 + delCount != docFreq)            throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);        }        msg("OK [" + termCount + " terms; " + totFreq + " terms/docs pairs; " + totPos + " tokens]");        if (infoStream != null)          infoStream.print("    test: stored fields.......");        int docCount = 0;        long totFields = 0;        for(int j=0;j<info.docCount;j++)          if (!reader.isDeleted(j)) {            docCount++;            Document doc = reader.document(j);            totFields += doc.getFields().size();          }        if (docCount != reader.numDocs())          throw new RuntimeException("docCount=" + docCount + " but saw " + docCount + " undeleted docs");        msg("OK [" + totFields + " total field count; avg " + nf.format((((float) totFields)/docCount)) + " fields per doc]");        if (infoStream != null)          infoStream.print("    test: term vectors........");        int totVectors = 0;        for(int j=0;j<info.docCount;j++)          if (!reader.isDeleted(j)) {            TermFreqVector[] tfv = reader.getTermFreqVectors(j);            if (tfv != null)              totVectors += tfv.length;          }        msg("OK [" + totVectors + " total vector count; avg " + nf.format((((float) totVectors)/docCount)) + " term/freq vector fields per doc]");        msg("");      } catch (Throwable t) {        msg("FAILED");        String comment;        comment = "fixIndex() would remove reference to this segment";        msg("    WARNING: " + comment + "; full exception:");        if (infoStream != null)          t.printStackTrace(infoStream);        msg("");        result.totLoseDocCount += toLoseDocCount;        result.numBadSegments++;        continue;      } finally {        if (reader != null)          reader.close();      }      // Keeper      result.newSegments.add(info.clone());    }    if (0 == result.numBadSegments) {      result.clean = true;      msg("No problems were detected with this index.\n");    } else      msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");    return result;  }    /** Repairs the index using previously returned result   *  from {@link #checkIndex}.  Note that this does not   *  remove any of the unreferenced files after it's done;   *  you must separately open an {@link IndexWriter}, which   *  deletes unreferenced files when it's created.   *   * <p><b>WARNING</b>: this writes a   *  new segments file into the index, effectively removing   *  all documents in broken segments from the index.   *  BE CAREFUL.   *   * <p><b>WARNING</b>: Make sure you only call this when the   *  index is not opened  by any writer. */  public void fixIndex(Status result) throws IOException {    if (result.partial)      throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");    result.newSegments.commit(result.dir);  }  private static boolean assertsOn;  private static boolean testAsserts() {    assertsOn = true;    return true;  }  private static boolean assertsOn() {    assert testAsserts();    return assertsOn;  }  /** Command-line interface to check and fix an index.    <p>    Run it like this:    <pre>    java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]    </pre>    <ul>    <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments    <li><code>-segment X</code>: only check the specified    segment(s).  This can be specified multiple times,    to check more than one segment, eg <code>-segment _2    -segment _a</code>.  You can't use this with the -fix    option.    </ul>    <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause                       documents (perhaps many) to be permanently removed from the index.  Always make                       a backup copy of your index before running this!  Do not run this tool on an index                       that is actively being written to.  You have been warned!    <p>                Run without -fix, this tool will open the index, report version information                       and report any exceptions it hits and what action it would take if -fix were                       specified.  With -fix, this tool will remove any segments that have issues and                       write a new segments_N file.  This means all documents contained in the affected                       segments will be removed.    <p>                       This tool exits with exit code 1 if the index cannot be opened or has any                       corruption, else 0.   */  public static void main(String[] args) throws IOException {    boolean doFix = false;    List onlySegments = new ArrayList();    String indexPath = null;    int i = 0;    while(i < args.length) {      if (args[i].equals("-fix")) {        doFix = true;        i++;      } else if (args[i].equals("-segment")) {        if (i == args.length-1) {          System.out.println("ERROR: missing name for -segment option");          System.exit(1);        }        onlySegments.add(args[i+1]);        i += 2;      } else {        if (indexPath != null) {          System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");          System.exit(1);        }        indexPath = args[i];        i++;      }    }    if (indexPath == null) {      System.out.println("\nERROR: index path not specified");      System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +                         "\n" +                         "  -fix: actually write a new segments_N file, removing any problematic segments\n" +                         "  -segment X: only check the specified segments.  This can be specified multiple\n" +                          "              times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +                         "              You can't use this with the -fix option\n" +                         "\n" +                          "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +                         "documents (perhaps many) to be permanently removed from the index.  Always make\n" +                         "a backup copy of your index before running this!  Do not run this tool on an index\n" +                         "that is actively being written to.  You have been warned!\n" +                         "\n" +                         "Run without -fix, this tool will open the index, report version information\n" +                         "and report any exceptions it hits and what action it would take if -fix were\n" +                         "specified.  With -fix, this tool will remove any segments that have issues and\n" +                          "write a new segments_N file.  This means all documents contained in the affected\n" +                         "segments will be removed.\n" +                         "\n" +                         "This tool exits with exit code 1 if the index cannot be opened or has any\n" +                         "corruption, else 0.\n");      System.exit(1);    }    if (!assertsOn())      System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");    if (onlySegments.size() == 0)      onlySegments = null;    else if (doFix) {      System.out.println("ERROR: cannot specify both -fix and -segment");      System.exit(1);    }    System.out.println("\nOpening index @ " + indexPath + "\n");    Directory dir = null;    try {      dir = FSDirectory.getDirectory(indexPath);    } catch (Throwable t) {      System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");      t.printStackTrace(System.out);      System.exit(1);    }    CheckIndex checker = new CheckIndex(dir);    checker.setInfoStream(System.out);    Status result = checker.checkIndex(onlySegments);    if (!result.clean) {      if (!doFix) {        System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");      } else {        System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");        System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");        for(int s=0;s<5;s++) {          try {            Thread.sleep(1000);          } catch (InterruptedException ie) {            Thread.currentThread().interrupt();            s--;            continue;          }          System.out.println("  " + (5-s) + "...");        }        System.out.println("Writing...");        checker.fixIndex(result);        System.out.println("OK");        System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");      }    }    System.out.println("");    final int exitCode;    if (result != null && result.clean == true)      exitCode = 0;    else      exitCode = 1;    System.exit(exitCode);  }}
上一页 12
💿 文件大小 748 K
👤 上传用户 Rosa_
📂 所属分类 Java编程
🏷️ 相关标签

#lucene #工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -