📄 segmentreader.java
字号:
package org.apache.lucene.index;/** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.util.Collection;import java.util.Enumeration;import java.util.HashSet;import java.util.Hashtable;import java.util.Set;import java.util.Vector;import org.apache.lucene.document.Document;import org.apache.lucene.store.InputStream;import org.apache.lucene.store.OutputStream;import org.apache.lucene.store.Directory;import org.apache.lucene.util.BitVector;/** * FIXME: Describe class <code>SegmentReader</code> here. * * @version $Id: SegmentReader.java,v 1.23 2004/07/10 06:19:01 otis Exp $ */final class SegmentReader extends IndexReader { private String segment; FieldInfos fieldInfos; private FieldsReader fieldsReader; TermInfosReader tis; TermVectorsReader termVectorsReader; BitVector deletedDocs = null; private boolean deletedDocsDirty = false; private boolean normsDirty = false; private boolean undeleteAll = false; InputStream freqStream; InputStream proxStream; // Compound File Reader when based on a compound file segment CompoundFileReader cfsReader; private class Norm { public Norm(InputStream in, int number) { this.in = in; this.number = number; } private InputStream in; private byte[] bytes; private boolean dirty; private int number; private void reWrite() throws IOException { // NOTE: norms are re-written in regular directory, not cfs OutputStream out = directory().createFile(segment + ".tmp"); try { out.writeBytes(bytes, maxDoc()); } finally { out.close(); } String fileName = segment + ".f" + number; directory().renameFile(segment + ".tmp", fileName); this.dirty = false; } } private Hashtable norms = new Hashtable(); SegmentReader(SegmentInfos sis, SegmentInfo si, boolean closeDir) throws IOException { super(si.dir, sis, closeDir); initialize(si); } SegmentReader(SegmentInfo si) throws IOException { super(si.dir); initialize(si); } private void initialize(SegmentInfo si) throws IOException { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = directory(); if (directory().fileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (hasDeletions(si)) deletedDocs = new BitVector(directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.openFile(segment + ".frq"); proxStream = cfsDir.openFile(segment + ".prx"); openNorms(cfsDir); if (fieldInfos.hasVectors()) { // open term vector files only as needed termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos); } } protected final void doCommit() throws IOException { if (deletedDocsDirty) { // re-write deleted deletedDocs.write(directory(), segment + ".tmp"); directory().renameFile(segment + ".tmp", segment + ".del"); } if(undeleteAll && directory().fileExists(segment + ".del")){ directory().deleteFile(segment + ".del"); } if (normsDirty) { // re-write norms Enumeration values = norms.elements(); while (values.hasMoreElements()) { Norm norm = (Norm) values.nextElement(); if (norm.dirty) { norm.reWrite(); } } } deletedDocsDirty = false; normsDirty = false; undeleteAll = false; } protected final void doClose() throws IOException { fieldsReader.close(); tis.close(); if (freqStream != null) freqStream.close(); if (proxStream != null) proxStream.close(); closeNorms(); if (termVectorsReader != null) termVectorsReader.close(); if (cfsReader != null) cfsReader.close(); } static final boolean hasDeletions(SegmentInfo si) throws IOException { return si.dir.fileExists(si.name + ".del"); } public boolean hasDeletions() { return deletedDocs != null; } static final boolean usesCompoundFile(SegmentInfo si) throws IOException { return si.dir.fileExists(si.name + ".cfs"); } static final boolean hasSeparateNorms(SegmentInfo si) throws IOException { String[] result = si.dir.list(); String pattern = si.name + ".f"; int patternLength = pattern.length(); for(int i = 0; i < 0; i++){ if(result[i].startsWith(pattern) && Character.isDigit(result[i].charAt(patternLength))) return true; } return false; } protected final void doDelete(int docNum) throws IOException { if (deletedDocs == null) deletedDocs = new BitVector(maxDoc()); deletedDocsDirty = true; undeleteAll = false; deletedDocs.set(docNum); } protected final void doUndeleteAll() throws IOException { deletedDocs = null; deletedDocsDirty = false; undeleteAll = true; } final Vector files() throws IOException { Vector files = new Vector(16); final String ext[] = new String[]{ "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp" }; for (int i = 0; i < ext.length; i++) { String name = segment + "." + ext[i]; if (directory().fileExists(name)) files.addElement(name); } for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed) files.addElement(segment + ".f" + i); } return files; } public final TermEnum terms() throws IOException { return tis.terms(); } public final TermEnum terms(Term t) throws IOException { return tis.terms(t); } public final synchronized Document document(int n) throws IOException { if (isDeleted(n)) throw new IllegalArgumentException ("attempt to access a deleted document"); return fieldsReader.doc(n); } public final synchronized boolean isDeleted(int n) { return (deletedDocs != null && deletedDocs.get(n)); } public final TermDocs termDocs() throws IOException { return new SegmentTermDocs(this); } public final TermPositions termPositions() throws IOException { return new SegmentTermPositions(this); } public final int docFreq(Term t) throws IOException { TermInfo ti = tis.get(t); if (ti != null) return ti.docFreq; else return 0; } public final int numDocs() { int n = maxDoc(); if (deletedDocs != null) n -= deletedDocs.count(); return n; } public final int maxDoc() { return fieldsReader.size(); } /** * @see IndexReader#getFieldNames() */ public Collection getFieldNames() throws IOException { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); fieldSet.add(fi.name); } return fieldSet; } /** * @see IndexReader#getFieldNames(boolean) */ public Collection getFieldNames(boolean indexed) throws IOException { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed == indexed) fieldSet.add(fi.name); } return fieldSet; } /** * * @param storedTermVector if true, returns only Indexed fields that have term vector info, * else only indexed fields without term vector info * @return Collection of Strings indicating the names of the fields */ public Collection getIndexedFieldNames(boolean storedTermVector) { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed == true && fi.storeTermVector == storedTermVector){ fieldSet.add(fi.name); } } return fieldSet; } public synchronized byte[] norms(String field) throws IOException { Norm norm = (Norm) norms.get(field); if (norm == null) // not an indexed field return null; if (norm.bytes == null) { // value not yet read byte[] bytes = new byte[maxDoc()]; norms(field, bytes, 0); norm.bytes = bytes; // cache it } return norm.bytes; } protected final void doSetNorm(int doc, String field, byte value) throws IOException { Norm norm = (Norm) norms.get(field); if (norm == null) // not an indexed field return; norm.dirty = true; // mark it dirty normsDirty = true; norms(field)[doc] = value; // set the value } /** Read norms into a pre-allocated array. */ public synchronized void norms(String field, byte[] bytes, int offset) throws IOException { Norm norm = (Norm) norms.get(field); if (norm == null) return; // use zeros in array if (norm.bytes != null) { // can copy from cache System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc()); return; } InputStream normStream = (InputStream) norm.in.clone(); try { // read from disk normStream.seek(0); normStream.readBytes(bytes, offset, maxDoc()); } finally { normStream.close(); } } private final void openNorms(Directory cfsDir) throws IOException { for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed) { String fileName = segment + ".f" + fi.number; // look first for re-written file, then in compound format Directory d = directory().fileExists(fileName) ? directory() : cfsDir; norms.put(fi.name, new Norm(d.openFile(fileName), fi.number)); } } } private final void closeNorms() throws IOException { synchronized (norms) { Enumeration enumerator = norms.elements(); while (enumerator.hasMoreElements()) { Norm norm = (Norm) enumerator.nextElement(); norm.in.close(); } } } /** Return a term frequency vector for the specified document and field. The * vector returned contains term numbers and frequencies for all terms in * the specified field of this document, if the field had storeTermVector * flag set. If the flag was not set, the method returns null. */ public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { // Check if this field is invalid or has no stored term vector FieldInfo fi = fieldInfos.fieldInfo(field); if (fi == null || !fi.storeTermVector) return null; return termVectorsReader.get(docNumber, field); } /** Return an array of term frequency vectors for the specified document. * The array contains a vector for each vectorized field in the document. * Each vector vector contains term numbers and frequencies for all terms * in a given vectorized field. * If no such fields existed, the method returns null. */ public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { if (termVectorsReader == null) return null; return termVectorsReader.get(docNumber); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -