📄 segmentmerger.java
字号:
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.util.Vector;import java.util.Iterator;import java.util.Collection;import java.io.IOException;import org.apache.lucene.document.FieldSelector;import org.apache.lucene.document.FieldSelectorResult;import org.apache.lucene.store.Directory;import org.apache.lucene.store.IndexOutput;/** * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, * into a single Segment. After adding the appropriate readers, call the merge method to combine the * segments. *<P> * If the compoundFile flag is set, then the segments will be merged into a compound file. * * * @see #merge * @see #add */final class SegmentMerger { /** norms header placeholder */ static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; private Directory directory; private String segment; private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL; private Vector readers = new Vector(); private FieldInfos fieldInfos; private int mergedDocs; /** This ctor used only by test code. * * @param dir The Directory to merge the other segments into * @param name The name of the new segment */ SegmentMerger(Directory dir, String name) { directory = dir; segment = name; } SegmentMerger(IndexWriter writer, String name) { directory = writer.getDirectory(); segment = name; termIndexInterval = writer.getTermIndexInterval(); } /** * Add an IndexReader to the collection of readers that are to be merged * @param reader */ final void add(IndexReader reader) { readers.addElement(reader); } /** * * @param i The index of the reader to return * @return The ith reader to be merged */ final IndexReader segmentReader(int i) { return (IndexReader) readers.elementAt(i); } /** * Merges the readers specified by the {@link #add} method into the directory passed to the constructor * @return The number of documents that were merged * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ final int merge() throws CorruptIndexException, IOException { int value; mergedDocs = mergeFields(); mergeTerms(); mergeNorms(); if (fieldInfos.hasVectors()) mergeVectors(); return mergedDocs; } /** * close all IndexReaders that have been added. * Should not be called before merge(). * @throws IOException */ final void closeReaders() throws IOException { for (int i = 0; i < readers.size(); i++) { // close readers IndexReader reader = (IndexReader) readers.elementAt(i); reader.close(); } } final Vector createCompoundFile(String fileName) throws IOException { CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName); Vector files = new Vector(IndexFileNames.COMPOUND_EXTENSIONS.length + 1); // Basic files for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) { files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]); } // Fieldable norm files for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { files.add(segment + "." + IndexFileNames.NORMS_EXTENSION); break; } } // Vector files if (fieldInfos.hasVectors()) { for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) { files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]); } } // Now merge all added files Iterator it = files.iterator(); while (it.hasNext()) { cfsWriter.addFile((String) it.next()); } // Perform the merge cfsWriter.close(); return files; } private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean storePayloads) throws IOException { Iterator i = names.iterator(); while (i.hasNext()) { String field = (String)i.next(); fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads); } } /** * * @return The number of documents in all of the readers * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ private final int mergeFields() throws CorruptIndexException, IOException { fieldInfos = new FieldInfos(); // merge field names int docCount = 0; for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.elementAt(i); addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false); addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false); addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false); addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false); addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true); addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false); fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false); } fieldInfos.write(directory, segment + ".fnm"); FieldsWriter fieldsWriter = // merge field values new FieldsWriter(directory, segment, fieldInfos); // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're // in merge mode, we use this FieldSelector FieldSelector fieldSelectorMerge = new FieldSelector() { public FieldSelectorResult accept(String fieldName) { return FieldSelectorResult.LOAD_FOR_MERGE; } }; try { for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.elementAt(i); int maxDoc = reader.maxDoc(); for (int j = 0; j < maxDoc; j++) if (!reader.isDeleted(j)) { // skip deleted docs fieldsWriter.addDocument(reader.document(j, fieldSelectorMerge)); docCount++; } } } finally { fieldsWriter.close(); } return docCount; } /** * Merge the TermVectors from each of the segments into the new one. * @throws IOException */ private final void mergeVectors() throws IOException { TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { for (int r = 0; r < readers.size(); r++) { IndexReader reader = (IndexReader) readers.elementAt(r); int maxDoc = reader.maxDoc(); for (int docNum = 0; docNum < maxDoc; docNum++) { // skip deleted docs if (reader.isDeleted(docNum)) continue;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -