📄 segmentmerger.cs
字号:
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using FieldSelector = Lucene.Net.Documents.FieldSelector;
using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult;
using Directory = Lucene.Net.Store.Directory;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
namespace Lucene.Net.Index
{
/// <summary> The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
/// into a single Segment. After adding the appropriate readers, call the merge method to combine the
/// segments.
/// <P>
/// If the compoundFile flag is set, then the segments will be merged into a compound file.
///
///
/// </summary>
/// <seealso cref="#merge">
/// </seealso>
/// <seealso cref="#add">
/// </seealso>
sealed class SegmentMerger
{
private class AnonymousClassFieldSelector : FieldSelector
{
public AnonymousClassFieldSelector(SegmentMerger enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(SegmentMerger enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private SegmentMerger enclosingInstance;
public SegmentMerger Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
public FieldSelectorResult Accept(System.String fieldName)
{
return FieldSelectorResult.LOAD_FOR_MERGE;
}
}
private void InitBlock()
{
termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
}
/// <summary>norms header placeholder </summary>
internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', (byte) 255};
private Directory directory;
private System.String segment;
private int termIndexInterval;
private System.Collections.ArrayList readers = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
private FieldInfos fieldInfos;
/// <summary>This ctor used only by test code.
///
/// </summary>
/// <param name="dir">The Directory to merge the other segments into
/// </param>
/// <param name="name">The name of the new segment
/// </param>
public SegmentMerger(Directory dir, System.String name)
{
InitBlock();
directory = dir;
segment = name;
}
internal SegmentMerger(IndexWriter writer, System.String name)
{
InitBlock();
directory = writer.GetDirectory();
segment = name;
termIndexInterval = writer.GetTermIndexInterval();
}
/// <summary> Add an IndexReader to the collection of readers that are to be merged</summary>
/// <param name="">reader
/// </param>
public void Add(IndexReader reader)
{
readers.Add(reader);
}
/// <summary> </summary>
/// <param name="i">The index of the reader to return
/// </param>
/// <returns> The ith reader to be merged
/// </returns>
internal IndexReader SegmentReader(int i)
{
return (IndexReader) readers[i];
}
/// <summary> Merges the readers specified by the {@link #add} method into the directory passed to the constructor</summary>
/// <returns> The number of documents that were merged
/// </returns>
/// <throws> IOException </throws>
public int Merge()
{
int value_Renamed;
value_Renamed = MergeFields();
MergeTerms();
MergeNorms();
if (fieldInfos.HasVectors())
MergeVectors();
return value_Renamed;
}
/// <summary> close all IndexReaders that have been added.
/// Should not be called before merge().
/// </summary>
/// <throws> IOException </throws>
public void CloseReaders()
{
for (int i = 0; i < readers.Count; i++)
{
// close readers
IndexReader reader = (IndexReader) readers[i];
reader.Close();
}
}
public System.Collections.ArrayList CreateCompoundFile(System.String fileName)
{
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName);
System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(IndexFileNames.COMPOUND_EXTENSIONS.Length + 1));
// Basic files
for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++)
{
files.Add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
}
// Fieldable norm files
for (int i = 0; i < fieldInfos.Size(); i++)
{
FieldInfo fi = fieldInfos.FieldInfo(i);
if (fi.isIndexed && !fi.omitNorms)
{
files.Add(segment + "." + IndexFileNames.NORMS_EXTENSION);
break;
}
}
// Vector files
if (fieldInfos.HasVectors())
{
for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++)
{
files.Add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]);
}
}
// Now merge all added files
System.Collections.IEnumerator it = files.GetEnumerator();
while (it.MoveNext())
{
cfsWriter.AddFile((System.String) it.Current);
}
// Perform the merge
cfsWriter.Close();
return files;
}
private void AddIndexed(IndexReader reader, FieldInfos fieldInfos, System.Collections.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector)
{
System.Collections.IEnumerator i = names.GetEnumerator();
while (i.MoveNext())
{
System.Collections.DictionaryEntry e = (System.Collections.DictionaryEntry) i.Current;
System.String field = (System.String) e.Key;
fieldInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field));
}
}
/// <summary> </summary>
/// <returns> The number of documents in all of the readers
/// </returns>
/// <throws> IOException </throws>
private int MergeFields()
{
fieldInfos = new FieldInfos(); // merge field names
int docCount = 0;
for (int i = 0; i < readers.Count; i++)
{
IndexReader reader = (IndexReader) readers[i];
AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
AddIndexed(reader, fieldInfos, reader.GetFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
fieldInfos.Add(reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED), false);
}
fieldInfos.Write(directory, segment + ".fnm");
FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
// in merge mode, we use this FieldSelector
FieldSelector fieldSelectorMerge = new AnonymousClassFieldSelector(this);
try
{
for (int i = 0; i < readers.Count; i++)
{
IndexReader reader = (IndexReader) readers[i];
int maxDoc = reader.MaxDoc();
for (int j = 0; j < maxDoc; j++)
if (!reader.IsDeleted(j))
{
// skip deleted docs
fieldsWriter.AddDocument(reader.Document(j, fieldSelectorMerge));
docCount++;
}
}
}
finally
{
fieldsWriter.Close();
}
return docCount;
}
/// <summary> Merge the TermVectors from each of the segments into the new one.</summary>
/// <throws> IOException </throws>
private void MergeVectors()
{
TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos);
try
{
for (int r = 0; r < readers.Count; r++)
{
IndexReader reader = (IndexReader) readers[r];
int maxDoc = reader.MaxDoc();
for (int docNum = 0; docNum < maxDoc; docNum++)
{
// skip deleted docs
if (reader.IsDeleted(docNum))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -