⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 segmentmerger.java

📁 天乙代码src_531.rar 天乙代码src_531.rar 天乙代码src_531.rar 天乙代码src_531.rar
💻 JAVA
字号:
package org.apache.lucene.index;/* ==================================================================== * The Apache Software License, Version 1.1 * * Copyright (c) 2001 The Apache Software Foundation.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * 3. The end-user documentation included with the redistribution, *    if any, must include the following acknowledgment: *       "This product includes software developed by the *        Apache Software Foundation (http://www.apache.org/)." *    Alternately, this acknowledgment may appear in the software itself, *    if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache" and "Apache Software Foundation" and *    "Apache Lucene" must not be used to endorse or promote products *    derived from this software without prior written permission. For *    written permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", *    "Apache Lucene", nor may "Apache" appear in their name, without *    prior written permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation.  For more * information on the Apache Software Foundation, please see * <http://www.apache.org/>. */import java.util.Vector;import java.io.IOException;import org.apache.lucene.store.Directory;import org.apache.lucene.store.OutputStream;import org.apache.lucene.store.InputStream;import org.apache.lucene.document.Document;import org.apache.lucene.util.PriorityQueue;import org.apache.lucene.util.BitVector;final class SegmentMerger {  private Directory directory;  private String segment;  private Vector readers = new Vector();  private FieldInfos fieldInfos;    SegmentMerger(Directory dir, String name) {    directory = dir;    segment = name;  }  final void add(SegmentReader reader) {    readers.addElement(reader);  }  final SegmentReader segmentReader(int i) {    return (SegmentReader)readers.elementAt(i);  }  final void merge() throws IOException {    try {      mergeFields();      mergeTerms();      mergeNorms();          } finally {      for (int i = 0; i < readers.size(); i++) {  // close readers	SegmentReader reader = (SegmentReader)readers.elementAt(i);	reader.close();      }    }  }  private final void mergeFields() throws IOException {    fieldInfos = new FieldInfos();		  // merge field names    for (int i = 0; i < readers.size(); i++) {      SegmentReader reader = (SegmentReader)readers.elementAt(i);      fieldInfos.add(reader.fieldInfos);    }    fieldInfos.write(directory, segment + ".fnm");        FieldsWriter fieldsWriter =			  // merge field values      new FieldsWriter(directory, segment, fieldInfos);    try {      for (int i = 0; i < readers.size(); i++) {	SegmentReader reader = (SegmentReader)readers.elementAt(i);	BitVector deletedDocs = reader.deletedDocs;	int maxDoc = reader.maxDoc();	for (int j = 0; j < maxDoc; j++)	  if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs	    fieldsWriter.addDocument(reader.document(j));      }    } finally {      fieldsWriter.close();    }  }  private OutputStream freqOutput = null;  private OutputStream proxOutput = null;  private TermInfosWriter termInfosWriter = null;  private SegmentMergeQueue queue = null;  private final void mergeTerms() throws IOException {    try {      freqOutput = directory.createFile(segment + ".frq");      proxOutput = directory.createFile(segment + ".prx");      termInfosWriter =	new TermInfosWriter(directory, segment, fieldInfos);            mergeTermInfos();          } finally {      if (freqOutput != null) 		freqOutput.close();      if (proxOutput != null) 		proxOutput.close();      if (termInfosWriter != null) 	termInfosWriter.close();      if (queue != null)		queue.close();    }  }  private final void mergeTermInfos() throws IOException {    queue = new SegmentMergeQueue(readers.size());    int base = 0;    for (int i = 0; i < readers.size(); i++) {      SegmentReader reader = (SegmentReader)readers.elementAt(i);      SegmentTermEnum termEnum = (SegmentTermEnum)reader.terms();      SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);      base += reader.numDocs();      if (smi.next())	queue.put(smi);				  // initialize queue      else	smi.close();    }    SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];        while (queue.size() > 0) {      int matchSize = 0;			  // pop matching terms      match[matchSize++] = (SegmentMergeInfo)queue.pop();      Term term = match[0].term;      SegmentMergeInfo top = (SegmentMergeInfo)queue.top();            while (top != null && term.compareTo(top.term) == 0) {	match[matchSize++] = (SegmentMergeInfo)queue.pop();	top = (SegmentMergeInfo)queue.top();      }      mergeTermInfo(match, matchSize);		  // add new TermInfo            while (matchSize > 0) {	SegmentMergeInfo smi = match[--matchSize];	if (smi.next())	  queue.put(smi);			  // restore queue	else	  smi.close();				  // done with a segment      }    }  }  private final TermInfo termInfo = new TermInfo(); // minimize consing  private final void mergeTermInfo(SegmentMergeInfo[] smis, int n)       throws IOException {    long freqPointer = freqOutput.getFilePointer();    long proxPointer = proxOutput.getFilePointer();    int df = appendPostings(smis, n);		  // append posting data    if (df > 0) {      // add an entry to the dictionary with pointers to prox and freq files      termInfo.set(df, freqPointer, proxPointer);      termInfosWriter.add(smis[0].term, termInfo);    }  }         private final int appendPostings(SegmentMergeInfo[] smis, int n)       throws IOException {    int lastDoc = 0;    int df = 0;					  // number of docs w/ term    for (int i = 0; i < n; i++) {      SegmentMergeInfo smi = smis[i];      SegmentTermPositions postings = smi.postings;      int base = smi.base;      int[] docMap = smi.docMap;      smi.termEnum.termInfo(termInfo);      postings.seek(termInfo);      while (postings.next()) {	int doc;	if (docMap == null)	  doc = base + postings.doc;		  // no deletions	else	  doc = base + docMap[postings.doc];	  // re-map around deletions	if (doc < lastDoc)	  throw new IllegalStateException("docs out of order");	int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1	lastDoc = doc;	int freq = postings.freq;	if (freq == 1) {	  freqOutput.writeVInt(docCode | 1);	  // write doc & freq=1	} else {	  freqOutput.writeVInt(docCode);	  // write doc	  freqOutput.writeVInt(freq);		  // write frequency in doc	}	  	int lastPosition = 0;			  // write position deltas	for (int j = 0; j < freq; j++) {	  int position = postings.nextPosition();	  proxOutput.writeVInt(position - lastPosition);	  lastPosition = position;	}	df++;      }    }    return df;  }  private final void mergeNorms() throws IOException {    for (int i = 0; i < fieldInfos.size(); i++) {      FieldInfo fi = fieldInfos.fieldInfo(i);      if (fi.isIndexed) {	OutputStream output = directory.createFile(segment + ".f" + i);	try {	  for (int j = 0; j < readers.size(); j++) {	    SegmentReader reader = (SegmentReader)readers.elementAt(j);	    BitVector deletedDocs = reader.deletedDocs;	    InputStream input = reader.normStream(fi.name);            int maxDoc = reader.maxDoc();	    try {	      for (int k = 0; k < maxDoc; k++) {		byte norm = input != null ? input.readByte() : (byte)0;		if (deletedDocs == null || !deletedDocs.get(k))		  output.writeByte(norm);	      }	    } finally {	      if (input != null)		input.close();	    }	  }	} finally {	  output.close();	}      }    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -