📄 segmentreader.java
字号:
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import org.apache.lucene.document.Document;import org.apache.lucene.document.FieldSelector;import org.apache.lucene.search.DefaultSimilarity;import org.apache.lucene.store.Directory;import org.apache.lucene.store.IndexInput;import org.apache.lucene.store.IndexOutput;import org.apache.lucene.store.BufferedIndexInput;import org.apache.lucene.util.BitVector;import java.io.IOException;import java.util.*;/** * @version $Id: SegmentReader.java 542561 2007-05-29 15:14:07Z mikemccand $ */class SegmentReader extends IndexReader { private String segment; private SegmentInfo si; FieldInfos fieldInfos; private FieldsReader fieldsReader; TermInfosReader tis; TermVectorsReader termVectorsReaderOrig = null; ThreadLocal termVectorsLocal = new ThreadLocal(); BitVector deletedDocs = null; private boolean deletedDocsDirty = false; private boolean normsDirty = false; private boolean undeleteAll = false; private boolean rollbackDeletedDocsDirty = false; private boolean rollbackNormsDirty = false; private boolean rollbackUndeleteAll = false; IndexInput freqStream; IndexInput proxStream; // optionally used for the .nrm file shared by multiple norms private IndexInput singleNormStream; // Compound File Reader when based on a compound file segment CompoundFileReader cfsReader = null; private class Norm { public Norm(IndexInput in, int number, long normSeek) { this.in = in; this.number = number; this.normSeek = normSeek; } private IndexInput in; private byte[] bytes; private boolean dirty; private int number; private long normSeek; private boolean rollbackDirty; private void reWrite(SegmentInfo si) throws IOException { // NOTE: norms are re-written in regular directory, not cfs si.advanceNormGen(this.number); IndexOutput out = directory().createOutput(si.getNormFileName(this.number)); try { out.writeBytes(bytes, maxDoc()); } finally { out.close(); } this.dirty = false; } /** Closes the underlying IndexInput for this norm. * It is still valid to access all other norm properties after close is called. * @throws IOException */ public void close() throws IOException { if (in != null && in != singleNormStream) { in.close(); } in = null; } } private Hashtable norms = new Hashtable(); /** The class which implements SegmentReader. */ private static Class IMPL; static { try { String name = System.getProperty("org.apache.lucene.SegmentReader.class", SegmentReader.class.getName()); IMPL = Class.forName(name); } catch (ClassNotFoundException e) { throw new RuntimeException("cannot load SegmentReader class: " + e, e); } catch (SecurityException se) { try { IMPL = Class.forName(SegmentReader.class.getName()); } catch (ClassNotFoundException e) { throw new RuntimeException("cannot load default SegmentReader class: " + e, e); } } } protected SegmentReader() { super(null); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException { return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static SegmentReader get(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException { return get(si.dir, si, null, false, false, readBufferSize); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static SegmentReader get(SegmentInfos sis, SegmentInfo si, boolean closeDir) throws CorruptIndexException, IOException { return get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE); } /** * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ public static SegmentReader get(Directory dir, SegmentInfo si, SegmentInfos sis, boolean closeDir, boolean ownDir, int readBufferSize) throws CorruptIndexException, IOException { SegmentReader instance; try { instance = (SegmentReader)IMPL.newInstance(); } catch (Exception e) { throw new RuntimeException("cannot load SegmentReader class: " + e, e); } instance.init(dir, sis, closeDir, ownDir); instance.initialize(si, readBufferSize); return instance; } private void initialize(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException { segment = si.name; this.si = si; boolean success = false; try { // Use compound file directory for some files, if it exists Directory cfsDir = directory(); if (si.getUseCompoundFile()) { cfsReader = new CompoundFileReader(directory(), segment + ".cfs", readBufferSize); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos, readBufferSize); // Verify two sources of "maxDoc" agree: if (fieldsReader.size() != si.docCount) { throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount); } tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize); // NOTE: the bitvector is stored using the regular directory, not cfs if (hasDeletions(si)) { deletedDocs = new BitVector(directory(), si.getDelFileName()); // Verify # deletes does not exceed maxDoc for this segment: if (deletedDocs.count() > maxDoc()) { throw new CorruptIndexException("number of deletes (" + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name); } } // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.openInput(segment + ".frq", readBufferSize); proxStream = cfsDir.openInput(segment + ".prx", readBufferSize); openNorms(cfsDir, readBufferSize); if (fieldInfos.hasVectors()) { // open term vector files only as needed termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos, readBufferSize); } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { doClose(); } } } protected void doCommit() throws IOException { if (deletedDocsDirty) { // re-write deleted si.advanceDelGen(); // We can write directly to the actual name (vs to a // .tmp & renaming it) because the file is not live // until segments file is written: deletedDocs.write(directory(), si.getDelFileName()); } if (undeleteAll && si.hasDeletions()) { si.clearDelGen(); } if (normsDirty) { // re-write norms si.setNumFields(fieldInfos.size()); Enumeration values = norms.elements(); while (values.hasMoreElements()) { Norm norm = (Norm) values.nextElement(); if (norm.dirty) { norm.reWrite(si); } } } deletedDocsDirty = false; normsDirty = false; undeleteAll = false; } protected void doClose() throws IOException { if (fieldsReader != null) { fieldsReader.close(); } if (tis != null) { tis.close(); } if (freqStream != null) freqStream.close(); if (proxStream != null) proxStream.close(); closeNorms(); if (termVectorsReaderOrig != null) termVectorsReaderOrig.close(); if (cfsReader != null) cfsReader.close(); } static boolean hasDeletions(SegmentInfo si) throws IOException { // Don't call ensureOpen() here (it could affect performance) return si.hasDeletions(); } public boolean hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return deletedDocs != null; } static boolean usesCompoundFile(SegmentInfo si) throws IOException { return si.getUseCompoundFile(); } static boolean hasSeparateNorms(SegmentInfo si) throws IOException { return si.hasSeparateNorms(); } protected void doDelete(int docNum) { if (deletedDocs == null) deletedDocs = new BitVector(maxDoc()); deletedDocsDirty = true; undeleteAll = false; deletedDocs.set(docNum); } protected void doUndeleteAll() { deletedDocs = null; deletedDocsDirty = false; undeleteAll = true; } Vector files() throws IOException { return new Vector(si.files()); } public TermEnum terms() { ensureOpen(); return tis.terms(); } public TermEnum terms(Term t) throws IOException { ensureOpen();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -