⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 terminfosreader.cpp

📁 clucene是c++版的全文检索引擎,完全移植于lucene,采用 stl 编写.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "TermInfosReader.h"

#include "Term.h"
#include "Terms.h"
#include "SegmentTermEnum.h"
#include "CLucene/store/Directory.h"
#include "FieldInfos.h"
#include "TermInfo.h"
#include "TermInfosWriter.h"
#include "CLucene/util/Misc.h"

CL_NS_USE(store)
CL_NS_USE(util)
CL_NS_DEF(index)


  TermInfosReader::TermInfosReader(Directory* dir, const char* seg, FieldInfos* fis):
      directory (dir),fieldInfos (fis)
#ifndef _CL_DISABLE_MULTITHREADING
	  , enumerators(false, true)
#endif
	  {
  //Func - Constructor.
  //       Reads the TermInfos file (.tis) and eventually the Term Info Index file (.tii)
  //Pre  - dir is a reference to a valid Directory 
  //       Fis contains a valid reference to an FieldInfos instance
  //       seg != NULL and contains the name of the segment
  //Post - An instance has been created and the index named seg has been read. (Remember
  //       a segment is nothing more then an independently readable index)
	    
      CND_PRECONDITION(seg != NULL, "seg is NULL");

	  //Initialize the name of the segment
      segment    =  seg;
      //There are no indexTerms yet
      indexTerms    = NULL;
	  //So there are no indexInfos
	  indexInfos    = NULL;
	  //So there are no indexPointers
	  indexPointers = NULL; 	
      //Create a filname fo a Term Info File
	  char* tisFile = Misc::segmentname(segment,".tis");
	  char* tiiFile = Misc::segmentname(segment,".tii");

      //Create an SegmentTermEnum for storing all the terms read of the segment
      origEnum = _CLNEW SegmentTermEnum( directory->openInput( tisFile ), fieldInfos, false);
      indexEnum = _CLNEW SegmentTermEnum( directory->openInput( tiiFile ), fieldInfos, true);

	  //Check if enumerator points to a valid instance
      CND_CONDITION(origEnum != NULL, "No memory could be allocated for orig enumerator");
      CND_CONDITION(indexEnum != NULL, "No memory could be allocated for index enumerator");

      _CLDELETE_CaARRAY(tisFile);
      _CLDELETE_CaARRAY(tiiFile);

      //Get the size of the enumeration and store it in size
      _size =  origEnum->size;

#ifdef _CL_DISABLE_MULTITHREADING
	stEnumerator = NULL;
#endif
  }

  TermInfosReader::~TermInfosReader(){
  //Func - Destructor
  //Pre  - true
  //Post - The instance has been destroyed

      //Close the TermInfosReader to be absolutly sure that enumerator has been closed
	  //and the arrays indexTerms, indexPointers and indexInfos and  their elements 
	  //have been destroyed
      close();
  }

  void TermInfosReader::close() {
  //Func - Close the enumeration of TermInfos
  //Pre  - true
  //Post - The _enumeration has been closed and the arrays

	  //Check if indexTerms and indexInfos exist
     if (indexTerms && indexInfos){
          //Iterate through arrays indexTerms and indexPointer to
	      //destroy their elements
#ifdef _DEBUG
         for ( int32_t i=0; i<indexTermsLength;++i ){
			 if ( indexTerms[i].__cl_refcount != 1 )
				 CND_PRECONDITION(indexTerms[i].__cl_refcount==1,"TermInfosReader term was references more than internally");
         //   _CLDECDELETE(indexTerms[i]);
            //_CLDELETE(indexInfos[i]);
         }
#endif
         //Delete the arrays
         _CLDELETE_ARRAY(indexTerms);
         _CLDELETE_ARRAY(indexInfos);
     }

#ifdef _CL_DISABLE_MULTITHREADING
	  _CLDELETE(stEnumerator);
#endif

      //Delete the arrays
      _CLDELETE_ARRAY(indexPointers);

      if (origEnum != NULL){
        origEnum->close();

	    //Get a pointer to IndexInput used by the enumeration but 
	    //instantiated in the constructor by directory.open( tisFile )
        IndexInput *is = origEnum->input;

        //Delete the enumuration enumerator
        _CLDELETE(origEnum);

        //Delete the IndexInput 
        _CLDELETE(is);	
      }
	  
      if (indexEnum != NULL){
        indexEnum->close();

	    //Get a pointer to IndexInput used by the enumeration but 
	    //instantiated in the constructor by directory.open( tiiFile )
        IndexInput *is = indexEnum->input;

        //Delete the enumuration enumerator
        _CLDELETE(indexEnum);

        //Delete the IndexInput 
        _CLDELETE(is);	
      }
  }

  int64_t TermInfosReader::size() const{
  //Func - Return the size of the enumeration of TermInfos
  //Pre  - true
  //Post - size has been returened

      return _size;
  }


  Term* TermInfosReader::get(const int32_t position) {
  //Func - Returns the nth term in the set
  //Pre  - position > = 0
  //Post - The n-th term in the set has been returned
      
	  //Check if the size is 0 because then there are no terms
      if (_size == 0) 
          return NULL;
    
	  SegmentTermEnum* enumerator = getEnum();

  //if
	  if ( 
	      enumerator != NULL //an enumeration exists
	      && enumerator->term(false) != NULL // term is at or past current
	      && position >= enumerator->position
		  && position < (enumerator->position + enumerator->indexInterval)
	     ) 
	  {
		  return scanEnum(position);			  // can avoid seek
	  }

    //random-access: must seek
    seekEnum(position / enumerator->indexInterval); 

	//Get the Term at position
    return scanEnum(position);
  }

	//todo: currently there is no way of cleaning up a thread, if the thread ends.
	//we are stuck with the terminfosreader of that thread. Hopefully this won't
	//be too big a problem... solutions anyone?
  SegmentTermEnum* TermInfosReader::getEnum(){
#ifdef _CL_DISABLE_MULTITHREADING
	  if ( stEnumerator== NULL )
		  stEnumerator = terms();
	return stEnumerator;
#else
	SCOPED_LOCK_MUTEX(enumerators_LOCK)
	SegmentTermEnum* termEnum = enumerators.get(_LUCENE_CURRTHREADID);
    if (termEnum == NULL){
      termEnum = terms();
      enumerators.put(_LUCENE_CURRTHREADID, termEnum);
    }
    return termEnum;
#endif
  }

  TermInfo* TermInfosReader::get(const Term* term){
  //Func - Returns a TermInfo for a term
  //Pre  - term holds a valid reference to term
  //Post - if term can be found its TermInfo has been returned otherwise NULL

    //If the size of the enumeration is 0 then no Terms have been read
	if (_size == 0)
		return NULL;

    ensureIndexIsRead();

    // optimize sequential access: first try scanning cached enum w/o seeking
    SegmentTermEnum* enumerator = getEnum();

    // optimize sequential access: first try scanning cached enumerator w/o seeking
    //if
    if (
	      //the current term of the enumeration enumerator is not at the end AND
      	enumerator->term(false) != NULL	 && 
      	(
            //there exists a previous current called prev and term is positioned after this prev OR
            ( enumerator->prev != NULL && term->compareTo(enumerator->prev) > 0) || 
            //term is positioned at the same position as the current of enumerator or at a higher position
            term->compareTo(enumerator->term(false)) >= 0 )
      	)
     {

		//Calculate the offset for the position
		int32_t _enumOffset = (int32_t)(enumerator->position/enumerator->indexInterval)+1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -