segmentreader.java

来自「一套java版本的搜索引擎源码」· Java 代码 · 共 605 行 · 第 1/2 页
JAVA
605 行
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.document.Document;import org.apache.lucene.document.FieldSelector;import org.apache.lucene.search.DefaultSimilarity;import org.apache.lucene.store.Directory;import org.apache.lucene.store.IndexInput;import org.apache.lucene.store.IndexOutput;import org.apache.lucene.util.BitVector;import java.io.IOException;import java.util.*;/** * @version $Id: SegmentReader.java 496851 2007-01-16 20:24:52Z mikemccand $ */class SegmentReader extends IndexReader {  private String segment;  private SegmentInfo si;  FieldInfos fieldInfos;  private FieldsReader fieldsReader;  TermInfosReader tis;  TermVectorsReader termVectorsReaderOrig = null;  ThreadLocal termVectorsLocal = new ThreadLocal();  BitVector deletedDocs = null;  private boolean deletedDocsDirty = false;  private boolean normsDirty = false;  private boolean undeleteAll = false;  private boolean rollbackDeletedDocsDirty = false;  private boolean rollbackNormsDirty = false;  private boolean rollbackUndeleteAll = false;  IndexInput freqStream;  IndexInput proxStream;  // Compound File Reader when based on a compound file segment  CompoundFileReader cfsReader = null;  private class Norm {    public Norm(IndexInput in, int number, long normSeek)    {      this.in = in;      this.number = number;      this.normSeek = normSeek;    }    private IndexInput in;    private byte[] bytes;    private boolean dirty;    private int number;    private long normSeek;    private boolean rollbackDirty;    private void reWrite(SegmentInfo si) throws IOException {      // NOTE: norms are re-written in regular directory, not cfs      String oldFileName = si.getNormFileName(this.number);      if (oldFileName != null && !oldFileName.endsWith("." + IndexFileNames.NORMS_EXTENSION)) {        // Mark this file for deletion.  Note that we don't        // actually try to delete it until the new segments files is        // successfully written:        deleter.addPendingFile(oldFileName);      }      si.advanceNormGen(this.number);      IndexOutput out = directory().createOutput(si.getNormFileName(this.number));      try {        out.writeBytes(bytes, maxDoc());      } finally {        out.close();      }      this.dirty = false;    }  }  private Hashtable norms = new Hashtable();  /** The class which implements SegmentReader. */  private static Class IMPL;  static {    try {      String name =        System.getProperty("org.apache.lucene.SegmentReader.class",                           SegmentReader.class.getName());      IMPL = Class.forName(name);    } catch (ClassNotFoundException e) {      throw new RuntimeException("cannot load SegmentReader class: " + e, e);    } catch (SecurityException se) {      try {        IMPL = Class.forName(SegmentReader.class.getName());      } catch (ClassNotFoundException e) {        throw new RuntimeException("cannot load default SegmentReader class: " + e, e);      }    }  }  protected SegmentReader() { super(null); }  public static SegmentReader get(SegmentInfo si) throws IOException {    return get(si.dir, si, null, false, false);  }  public static SegmentReader get(SegmentInfos sis, SegmentInfo si,                                  boolean closeDir) throws IOException {    return get(si.dir, si, sis, closeDir, true);  }  public static SegmentReader get(Directory dir, SegmentInfo si,                                  SegmentInfos sis,                                  boolean closeDir, boolean ownDir)    throws IOException {    SegmentReader instance;    try {      instance = (SegmentReader)IMPL.newInstance();    } catch (Exception e) {      throw new RuntimeException("cannot load SegmentReader class: " + e, e);    }    instance.init(dir, sis, closeDir, ownDir);    instance.initialize(si);    return instance;  }  private void initialize(SegmentInfo si) throws IOException {    segment = si.name;    this.si = si;    boolean success = false;    try {      // Use compound file directory for some files, if it exists      Directory cfsDir = directory();      if (si.getUseCompoundFile()) {        cfsReader = new CompoundFileReader(directory(), segment + ".cfs");        cfsDir = cfsReader;      }      // No compound file exists - use the multi-file format      fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");      fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);      // Verify two sources of "maxDoc" agree:      if (fieldsReader.size() != si.docCount) {        throw new IllegalStateException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);      }      tis = new TermInfosReader(cfsDir, segment, fieldInfos);            // NOTE: the bitvector is stored using the regular directory, not cfs      if (hasDeletions(si)) {        deletedDocs = new BitVector(directory(), si.getDelFileName());        // Verify # deletes does not exceed maxDoc for this segment:        if (deletedDocs.count() > maxDoc()) {          throw new IllegalStateException("number of deletes (" + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name);        }      }      // make sure that all index files have been read or are kept open      // so that if an index update removes them we'll still have them      freqStream = cfsDir.openInput(segment + ".frq");      proxStream = cfsDir.openInput(segment + ".prx");      openNorms(cfsDir);      if (fieldInfos.hasVectors()) { // open term vector files only as needed        termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);      }      success = true;    } finally {      // With lock-less commits, it's entirely possible (and      // fine) to hit a FileNotFound exception above.  In      // this case, we want to explicitly close any subset      // of things that were opened so that we don't have to      // wait for a GC to do so.      if (!success) {        doClose();      }    }  }  protected void doCommit() throws IOException {    if (deletedDocsDirty) {               // re-write deleted      String oldDelFileName = si.getDelFileName();      if (oldDelFileName != null) {        // Mark this file for deletion.  Note that we don't        // actually try to delete it until the new segments files is        // successfully written:        deleter.addPendingFile(oldDelFileName);      }      si.advanceDelGen();      // We can write directly to the actual name (vs to a      // .tmp & renaming it) because the file is not live      // until segments file is written:      deletedDocs.write(directory(), si.getDelFileName());    }    if (undeleteAll && si.hasDeletions()) {      String oldDelFileName = si.getDelFileName();      if (oldDelFileName != null) {        // Mark this file for deletion.  Note that we don't        // actually try to delete it until the new segments files is        // successfully written:        deleter.addPendingFile(oldDelFileName);      }      si.clearDelGen();    }    if (normsDirty) {               // re-write norms      si.setNumFields(fieldInfos.size());      Enumeration values = norms.elements();      while (values.hasMoreElements()) {        Norm norm = (Norm) values.nextElement();        if (norm.dirty) {          norm.reWrite(si);        }      }    }    deletedDocsDirty = false;    normsDirty = false;    undeleteAll = false;  }  protected void doClose() throws IOException {    if (fieldsReader != null) {      fieldsReader.close();    }    if (tis != null) {      tis.close();    }    if (freqStream != null)      freqStream.close();    if (proxStream != null)      proxStream.close();    closeNorms();    if (termVectorsReaderOrig != null)      termVectorsReaderOrig.close();    if (cfsReader != null)      cfsReader.close();  }  static boolean hasDeletions(SegmentInfo si) throws IOException {    return si.hasDeletions();  }  public boolean hasDeletions() {    return deletedDocs != null;  }  static boolean usesCompoundFile(SegmentInfo si) throws IOException {    return si.getUseCompoundFile();  }  static boolean hasSeparateNorms(SegmentInfo si) throws IOException {    return si.hasSeparateNorms();  }  protected void doDelete(int docNum) {    if (deletedDocs == null)      deletedDocs = new BitVector(maxDoc());    deletedDocsDirty = true;    undeleteAll = false;    deletedDocs.set(docNum);  }  protected void doUndeleteAll() {      deletedDocs = null;      deletedDocsDirty = false;      undeleteAll = true;  }  Vector files() throws IOException {    Vector files = new Vector(16);    if (si.getUseCompoundFile()) {      String name = segment + ".cfs";      if (directory().fileExists(name)) {        files.addElement(name);      }
segmentreader.java - 源码说明

本页面展示了「一套java版本的搜索引擎源码」中的 segmentreader.java 源码文件，采用 Java 编程语言编写，共 605 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?