⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 terminfosreader.cpp

📁 clucene是c++版的全文检索引擎,完全移植于lucene,采用 stl 编写.
💻 CPP
📖 第 1 页 / 共 2 页
字号:

		// but before end of block
		if (
			//the length of indexTerms (the number of terms in enumerator) equals
			//_enum_offset OR
			indexTermsLength == _enumOffset	 || 
			//term is positioned in front of term found at _enumOffset in indexTerms
			term->compareTo(&indexTerms[_enumOffset]) < 0){

			//no need to seek, retrieve the TermInfo for term
			return scanEnum(term);
        }
    }

    //Reposition current term in the enumeration 
    seekEnum(getIndexOffset(term));
	//Return the TermInfo for term
    return scanEnum(term);
  }


  int64_t TermInfosReader::getPosition(const Term* term) {
  //Func - Returns the position of a Term in the set
  //Pre  - term holds a valid reference to a Term
  //       enumerator != NULL
  //Post - If term was found then its position is returned otherwise -1

	  //if the enumeration is empty then return -1
	  if (_size == 0)
		  return -1;

	  ensureIndexIsRead();

      //Retrieve the indexOffset for term
      int32_t indexOffset = getIndexOffset(term);
      seekEnum(indexOffset);

	  SegmentTermEnum* enumerator = getEnum();

      while(term->compareTo(enumerator->term(false)) > 0 && enumerator->next()) {}

	  if ( term->equals(enumerator->term(false)) ){
          return enumerator->position;
	  }else
          return -1;
  }

  SegmentTermEnum* TermInfosReader::terms(const Term* term) {
  //Func - Returns an enumeration of terms starting at or after the named term.
  //       If term is null then enumerator is set to the beginning
  //Pre  - term holds a valid reference to a Term
  //       enumerator != NULL
  //Post - An enumeration of terms starting at or after the named term has been returned
      
	  SegmentTermEnum* enumerator = NULL;
	  if ( term != NULL ){
		//Seek enumerator to term; delete the new TermInfo that's returned.
		TermInfo* ti = get(term);
		_CLDELETE(ti);
		enumerator = getEnum();
	  }else
	    enumerator = origEnum;

      //Clone the entire enumeration
      SegmentTermEnum* cln = enumerator->clone();

      //Check if cln points to a valid instance
      CND_CONDITION(cln != NULL,"cln is NULL");

      return cln;
  }


  void TermInfosReader::ensureIndexIsRead() {
  //Func - Reads the term info index file or .tti file.
  //       This file contains every IndexInterval-th entry from the .tis file, 
  //       along with its location in the "tis" file. This is designed to be read entirely 
  //       into memory and used to provide random access to the "tis" file.
  //Pre  - indexTerms    = NULL
  //       indexInfos    = NULL
  //       indexPointers = NULL
  //Post - The term info index file has been read into memory

    SCOPED_LOCK_MUTEX(THIS_LOCK)

	  if ( indexTerms != NULL )
		  return;

      try {
          indexTermsLength = (size_t)indexEnum->size;

		  //Instantiate an block of Term's,so that each one doesn't have to be new'd
          indexTerms    = _CL_NEWARRAY(Term,indexTermsLength);
          CND_CONDITION(indexTerms != NULL,"No memory could be allocated for indexTerms");//Check if is indexTerms is a valid array

		  //Instantiate an big block of TermInfo's, so that each one doesn't have to be new'd
          indexInfos    = _CL_NEWARRAY(TermInfo,indexTermsLength);
          CND_CONDITION(indexInfos != NULL,"No memory could be allocated for indexInfos"); //Check if is indexInfos is a valid array

          //Instantiate an array indexPointers that contains pointers to the term info index file
          indexPointers = _CL_NEWARRAY(int64_t,indexTermsLength);
          CND_CONDITION(indexPointers != NULL,"No memory could be allocated for indexPointers");//Check if is indexPointers is a valid array

		  //Iterate through the terms of indexEnum
          for (int32_t i = 0; indexEnum->next(); ++i){
              indexTerms[i].set(indexEnum->term(false),indexEnum->term(false)->text());
              indexEnum->getTermInfo(&indexInfos[i]);
              indexPointers[i] = indexEnum->indexPointer;
          }
    }_CLFINALLY(
          indexEnum->close(); 
		  //Close and delete the IndexInput is. The close is done by the destructor.
          _CLDELETE( indexEnum->input );
          _CLDELETE( indexEnum ); 
    );
  }


  int32_t TermInfosReader::getIndexOffset(const Term* term){
  //Func - Returns the offset of the greatest index entry which is less than or equal to term.
  //Pre  - term holds a reference to a valid term
  //       indexTerms != NULL
  //Post - The new offset has been returned

      //Check if is indexTerms is a valid array
      CND_PRECONDITION(indexTerms != NULL,"indexTerms is NULL");

      int32_t lo = 0;					  
      int32_t hi = indexTermsLength - 1;
	  int32_t mid;
	  int32_t delta;
	  
      while (hi >= lo) {
          //Start in the middle betwee hi and lo
          mid = (lo + hi) >> 1;

          //Check if is indexTerms[mid] is a valid instance of Term
          CND_PRECONDITION(&indexTerms[mid] != NULL,"indexTerms[mid] is NULL");
          CND_PRECONDITION(mid < indexTermsLength,"mid >= indexTermsLength");

		  //Determine if term is before mid or after mid
          delta = term->compareTo(&indexTerms[mid]);
          if (delta < 0){
              //Calculate the new hi   
              hi = mid - 1;
          }else if (delta > 0){
              //Calculate the new lo 
              lo = mid + 1;
		  }else{
              //term has been found so return its position
              return mid;
          }
     }
     // the new starting offset
     return hi;
  }

  void TermInfosReader::seekEnum(const int32_t indexOffset) {
  //Func - Reposition the current Term and TermInfo to indexOffset
  //Pre  - indexOffset >= 0
  //       indexTerms    != NULL
  //       indexInfos    != NULL
  //       indexPointers != NULL
  //Post - The current Term and Terminfo have been repositioned to indexOffset

      CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative number");
      CND_PRECONDITION(indexTerms != NULL,    "indexTerms is NULL");
      CND_PRECONDITION(indexInfos != NULL,    "indexInfos is NULL");
      CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");

	  SegmentTermEnum* enumerator =  getEnum();
	  enumerator->seek( 
          indexPointers[indexOffset],
		  (indexOffset * enumerator->indexInterval) - 1,
          &indexTerms[indexOffset], 
		  &indexInfos[indexOffset]
	      );
  }


  TermInfo* TermInfosReader::scanEnum(const Term* term) {
  //Func - Scans the Enumeration of terms for term and returns the corresponding TermInfo instance if found.
  //       The search is started from the current term.
  //Pre  - term contains a valid reference to a Term
  //       enumerator != NULL
  //Post - if term has been found the corresponding TermInfo has been returned otherwise NULL
  //       has been returned

      SegmentTermEnum* enumerator = getEnum();
	  enumerator->scanTo(term);

      //Check if the at the position the Term term can be found
	  if (enumerator->term(false) != NULL && term->equals(enumerator->term(false)) ){
		  //Return the TermInfo instance about term
          return enumerator->getTermInfo();
     }else{
          //term was not found so no TermInfo can be returned
          return NULL;
     }
  }

  Term* TermInfosReader::scanEnum(const int32_t position) {
  //Func - Scans the enumeration to the requested position and returns the
  //       Term located at that position
  //Pre  - position > = 0
  //       enumerator != NULL
  //Post - The Term at the requested position has been returned

      SegmentTermEnum* enumerator = getEnum();

	  //As long the position of the enumeration enumerator is smaller than the requested one
      while(enumerator->position < position){
		  //Move the current of enumerator to the next
		  if (!enumerator->next()){
			  //If there is no next it means that the requested position was to big
              return NULL;
          }
	  }

	  //Return the Term a the requested position
	  return enumerator->term();
  }

CL_NS_END

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -