📄 logmergepolicy.java

📁 lucene-2.4.0 是一个全文收索的工具包
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
package org.apache.lucene.index;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.util.Set;import org.apache.lucene.store.Directory;/** <p>This class implements a {@link MergePolicy} that tries *  to merge segments into levels of exponentially *  increasing size, where each level has < mergeFactor *  segments in it.  Whenever a given levle has mergeFactor *  segments or more in it, they will be merged.</p> * * <p>This class is abstract and requires a subclass to * define the {@link #size} method which specifies how a * segment's size is determined.  {@link LogDocMergePolicy} * is one subclass that measures size by document count in * the segment.  {@link LogByteSizeMergePolicy} is another * subclass that measures size as the total byte size of the * file(s) for the segment.</p> */public abstract class LogMergePolicy extends MergePolicy {  /** Defines the allowed range of log(size) for each   *  level.  A level is computed by taking the max segment   *  log size, minuse LEVEL_LOG_SPAN, and finding all   *  segments falling within that range. */  public static final double LEVEL_LOG_SPAN = 0.75;  /** Default merge factor, which is how many segments are   *  merged at a time */  public static final int DEFAULT_MERGE_FACTOR = 10;  /** Default maximum segment size.  A segment of this size   *  or larger will never be merged.  @see setMaxMergeDocs */  public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;  private int mergeFactor = DEFAULT_MERGE_FACTOR;  long minMergeSize;  long maxMergeSize;  int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;  private boolean useCompoundFile = true;  private boolean useCompoundDocStore = true;  private IndexWriter writer;  private void message(String message) {    if (writer != null)      writer.message("LMP: " + message);  }  /** <p>Returns the number of segments that are merged at   * once and also controls the total number of segments   * allowed to accumulate in the index.</p> */  public int getMergeFactor() {    return mergeFactor;  }  /** Determines how often segment indices are merged by   * addDocument().  With smaller values, less RAM is used   * while indexing, and searches on unoptimized indices are   * faster, but indexing speed is slower.  With larger   * values, more RAM is used during indexing, and while   * searches on unoptimized indices are slower, indexing is   * faster.  Thus larger values (> 10) are best for batch   * index creation, and smaller values (< 10) for indices   * that are interactively maintained. */  public void setMergeFactor(int mergeFactor) {    if (mergeFactor < 2)      throw new IllegalArgumentException("mergeFactor cannot be less than 2");    this.mergeFactor = mergeFactor;  }  // Javadoc inherited  public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) {    return useCompoundFile;  }  /** Sets whether compound file format should be used for   *  newly flushed and newly merged segments. */  public void setUseCompoundFile(boolean useCompoundFile) {    this.useCompoundFile = useCompoundFile;  }  /** Returns true if newly flushed and newly merge segments   *  are written in compound file format. @see   *  #setUseCompoundFile */  public boolean getUseCompoundFile() {    return useCompoundFile;  }  // Javadoc inherited  public boolean useCompoundDocStore(SegmentInfos infos) {    return useCompoundDocStore;  }  /** Sets whether compound file format should be used for   *  newly flushed and newly merged doc store   *  segment files (term vectors and stored fields). */  public void setUseCompoundDocStore(boolean useCompoundDocStore) {    this.useCompoundDocStore = useCompoundDocStore;  }  /** Returns true if newly flushed and newly merge doc   *  store segment files (term vectors and stored fields)   *  are written in compound file format. @see   *  #setUseCompoundDocStore */  public boolean getUseCompoundDocStore() {    return useCompoundDocStore;  }  public void close() {}  abstract protected long size(SegmentInfo info) throws IOException;  private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set segmentsToOptimize) throws IOException {    final int numSegments = infos.size();    int numToOptimize = 0;    SegmentInfo optimizeInfo = null;    for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) {      final SegmentInfo info = infos.info(i);      if (segmentsToOptimize.contains(info)) {        numToOptimize++;        optimizeInfo = info;      }    }    return numToOptimize <= maxNumSegments &&      (numToOptimize != 1 || isOptimized(writer, optimizeInfo));  }  /** Returns true if this single nfo is optimized (has no   *  pending norms or deletes, is in the same dir as the   *  writer, and matches the current compound file setting */  private boolean isOptimized(IndexWriter writer, SegmentInfo info)    throws IOException {    return !info.hasDeletions() &&      !info.hasSeparateNorms() &&      info.dir == writer.getDirectory() &&      info.getUseCompoundFile() == useCompoundFile;  }  /** Returns the merges necessary to optimize the index.   *  This merge policy defines "optimized" to mean only one   *  segment in the index, where that segment has no   *  deletions pending nor separate norms, and it is in   *  compound file format if the current useCompoundFile   *  setting is true.  This method returns multiple merges   *  (mergeFactor at a time) so the {@link MergeScheduler}   *  in use may make use of concurrency. */  public MergeSpecification findMergesForOptimize(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set segmentsToOptimize) throws IOException {    MergeSpecification spec;    assert maxNumSegments > 0;    if (!isOptimized(infos, writer, maxNumSegments, segmentsToOptimize)) {      // Find the newest (rightmost) segment that needs to      // be optimized (other segments may have been flushed      // since optimize started):      int last = infos.size();      while(last > 0) {        final SegmentInfo info = infos.info(--last);        if (segmentsToOptimize.contains(info)) {          last++;          break;        }      }      if (last > 0) {        spec = new MergeSpecification();        // First, enroll all "full" merges (size        // mergeFactor) to potentially be run concurrently:        while (last - maxNumSegments + 1 >= mergeFactor) {          spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));          last -= mergeFactor;        }        // Only if there are no full merges pending do we        // add a final partial (< mergeFactor segments) merge:        if (0 == spec.merges.size()) {          if (maxNumSegments == 1) {            // Since we must optimize down to 1 segment, the            // choice is simple:            if (last > 1 || !isOptimized(writer, infos.info(0)))              spec.add(new OneMerge(infos.range(0, last), useCompoundFile));          } else if (last > maxNumSegments) {            // Take care to pick a partial merge that is            // least cost, but does not make the index too            // lopsided.  If we always just picked the            // partial tail then we could produce a highly            // lopsided index over time:
12 下一页
💿 文件大小 748 K
👤 上传用户 Rosa_
📂 所属分类 Java编程
🏷️ 相关标签

#lucene #工具包
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -