⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 multireader.cpp

📁 clucene是c++版的全文检索引擎,完全移植于lucene,采用 stl 编写.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
  }

  void MultiTermDocs::seek(TermEnum* termEnum){
    seek(termEnum->term(false));
  }

  void MultiTermDocs::seek( Term* tterm) {
  //Func - Resets the instance for a new search
  //Pre  - tterm != NULL
  //Post - The instance has been reset for a new search

    CND_PRECONDITION(tterm != NULL, "tterm is NULL");

	//Assigning tterm is done as below for a reason
	//The construction ensures that if seek is called from within
	//MultiTermDocs with as argument this->term (seek(this->term)) that the assignment
	//will succeed and all referencecounters represent the correct situation
	
	//Get a pointer from tterm and increase its reference counter
    Term *TempTerm = _CL_POINTER(tterm);
	
	//Finialize term to ensure we decrease the reference counter of the instance which term points to
	_CLDECDELETE(term);

	//Assign TempTerm to term
	term = TempTerm;
	
	base = 0;
	pointer = 0;
	current = NULL;
  }

  bool MultiTermDocs::next() {
    if (current != NULL && current->next()) {
      return true;
    } else if (pointer < subReadersLength) {
      base = starts[pointer];
      current = termDocs(pointer++);
      return next();
    } else
      return false;
  }

  int32_t MultiTermDocs::read(int32_t* docs, int32_t* freqs, int32_t length) {
    while (true) {
      while (current == NULL) {
        if (pointer < subReadersLength) {		  // try next segment
          base = starts[pointer];
          current = termDocs(pointer++);
        } else {
          return 0;
        }
      }
      int32_t end = current->read(docs, freqs,length);
      if (end == 0) {				  // none left in segment
        current = NULL;
      } else {					  // got some
        int32_t b = base;			  // adjust doc numbers
        for (int32_t i = 0; i < end; i++)
          docs[i] += b;
        return end;
      }
    }
  }

  bool MultiTermDocs::skipTo(const int32_t target) {
    do {
      if (!next())
        return false;
    } while (target > doc());
    return true;
  }

  void MultiTermDocs::close() {
  //Func - Closes all MultiTermDocs managed by this instance
  //Pre  - true
  //Post - All the MultiTermDocs have been closed


      //Check if readerTermDocs is valid
    if (readerTermDocs){
          TermDocs* curTD = NULL;
          //iterate through the readerTermDocs array
          for (int32_t i = 0; i < subReadersLength; i++) {
            //Retrieve the i-th TermDocs instance
            curTD = readerTermDocs[i];

            //Check if it is a valid pointer
            if (curTD != NULL) {
               //Close it
               curTD->close();
               //Upon deletion, no need to set readerTermDocs[i] = NULL; the entire
               //readerTermDocs array is deleted immediately below, so there's no chance
               //that a member will be referenced.
               _CLVDELETE(curTD); //todo: not a clucene object... should be
            }
         }

      _CLDELETE_ARRAY(readerTermDocs);
        }

      //current previously pointed to a member of readerTermDocs; ensure that
      //it doesn't now point to invalid memory.
      current = NULL;
      base          = 0;
      pointer       = 0;

    _CLDECDELETE(term);
  }

  TermDocs* MultiTermDocs::termDocs(const IndexReader* reader) const {
    TermDocs* ret = reader->termDocs();
    return ret;
  }

  TermDocs* MultiTermDocs::termDocs(const int32_t i) const {
    if (term == NULL)
      return NULL;
    TermDocs* result = readerTermDocs[i];
    if (result == NULL){
      readerTermDocs[i] = termDocs(subReaders[i]);
      result = readerTermDocs[i];
    }
    result->seek(term);

    return result;
  }


  MultiTermEnum::MultiTermEnum(
	  IndexReader** subReaders, const int32_t *starts, const Term* t){
  //Func - Constructor
  //       Opens all enumerations of all readers
  //Pre  - readers != NULL and contains an array of IndexReader instances each responsible for
  //       reading a single segment
  //       subReadersLength >= 0 and represents the number of readers in the readers array
  //       starts is an array of
  //Post - An instance of has been created

  //Pre  - if readers is NULL then subReadersLength must be 0 else if readers != NULL then subReadersLength > 0
  //       s != NULL
  //Post - The instance has been created

       int32_t subReadersLength = 0;
       if ( subReaders != NULL ){
         while ( subReaders[subReadersLength] != NULL )
            subReadersLength++;
       }
      CND_PRECONDITION(starts != NULL,"starts is NULL");

    //Temporary variables
      IndexReader*   reader    = NULL;
      TermEnum* termEnum  = NULL;
      SegmentMergeInfo* smi      = NULL;
	  _docFreq = 0;
	  _term = NULL;
      queue                      = _CLNEW SegmentMergeQueue(subReadersLength);

    CND_CONDITION (queue != NULL, "Could not allocate memory for queue");

    //iterate through all the readers
    for ( int32_t i=0;i<subReadersLength;i++ ) {
      //Get the i-th reader
      reader = subReaders[i];

      //Check if the enumeration must start from term t
      if (t != NULL) {
         //termEnum is an enumeration of terms starting at or after the named term t
         termEnum = reader->terms(t);
      }else{
        //termEnum is an enumeration of all the Terms and TermInfos in the set.
        termEnum = reader->terms();
      }

		//Instantiate an new SegmentMerginfo
		smi = _CLNEW SegmentMergeInfo(starts[i], termEnum, reader);

      // Note that in the call termEnum->getTerm(false) below false is required because
      // otherwise a reference is leaked. By passing false getTerm is
      // ordered to return an unowned reference instead. (Credits for DSR)
      if (t == NULL ? smi->next() : termEnum->term(false) != NULL){
         // initialize queue
         queue->put(smi);
      } else{
         //Close the SegmentMergeInfo
         smi->close();
         //And have it deleted
         _CLDELETE(smi);
      }
    }

    //Check if the queue has elements
    if (t != NULL && queue->size() > 0) {
       next();
    }
  }

  MultiTermEnum::~MultiTermEnum(){
  //Func - Destructor
  //Pre  - true
  //Post - All the resource have been freed and the instance has been deleted

    //Close the enumeration
    close();

    //Delete the queue
    _CLDELETE(queue);
  }

  bool MultiTermEnum::next(){
  //Func - Move the current term to the next in the set of enumerations
  //Pre  - true
  //Post - Returns true if term has been moved to the next in the set of enumerations
  //       Returns false if this was not possible

    SegmentMergeInfo* top = queue->top();
    if (top == NULL) {
        _CLDECDELETE(_term); 
        _term = NULL;
        return false;
    }

    //The getTerm method requires the client programmer to indicate whether he
    // owns the returned reference, so we can discard ours
    // right away.
    _CLDECDELETE(_term); 

	//Assign term the term of top and make sure the reference counter is increased
	_term = _CL_POINTER(top->term);
	_docFreq = 0;
	
	//Find the next term
	while (top != NULL && _term->compareTo(top->term) == 0) {
		//don't delete, this is the top
		queue->pop(); 
		// increment freq
		_docFreq += top->termEnum->docFreq();	  
		if (top->next()){
			// restore queue
			queue->put(top);				  
		}else{
			// done with a segment
			top->close();				  
			_CLDELETE(top);
		}
		top = queue->top();
	}
	
	return true;
}


  Term* MultiTermEnum::term() {
  //Func - Returns the current term of the set of enumerations
  //Pre  - pointer is true or false and indicates if the reference counter
  //       of term must be increased or not
  //       next() must have been called once!
  //Post - pointer = true -> term has been returned with an increased reference counter
  //       pointer = false -> term has been returned

    return _CL_POINTER(_term);
  }
  Term* MultiTermEnum::term(bool pointer) {
  	if ( pointer )
    	return _CL_POINTER(_term);
    else
    	return _term;
  }

  int32_t MultiTermEnum::docFreq() const {
  //Func - Returns the document frequency of the current term in the set
  //Pre  - termInfo != NULL
  //       next() must have been called once
  //Post  - The document frequency of the current enumerated term has been returned

      return _docFreq;
  }


  void MultiTermEnum::close() {
  //Func - Closes the set of enumerations in the queue
  //Pre  - queue holds a valid reference to a SegmentMergeQueue
  //Post - The queue has been closed all SegmentMergeInfo instance have been deleted by
  //       the closing of the queue
  //       term has been finalized and reset to NULL

      // Needed when this enumeration hasn't actually been exhausted yet
      _CLDECDELETE(_term);

    //Close the queue This will destroy all SegmentMergeInfo instances!
      queue->close();

  }





  MultiTermPositions::MultiTermPositions(IndexReader** r, const int32_t* s){
  //Func - Constructor
  //Pre  - if r is NULL then rLen must be 0 else if r != NULL then rLen > 0
  //       s != NULL
  //Post - The instance has been created

      subReaders       = r;
      subReadersLength    = 0;
      if ( subReaders != NULL ){
         while ( subReaders[subReadersLength] != NULL )
            subReadersLength ++ ;
      }

      CND_PRECONDITION(s != NULL, "s is NULL");

      starts        = s;
      base          = 0;
      pointer       = 0;
      current       = NULL;
      term          = NULL;

      readerTermDocs   = NULL;

    //Check if there are readers
    if(subReaders != NULL && subReadersLength > 0){
          readerTermDocs = (TermDocs**)_CL_NEWARRAY(SegmentTermPositions*,subReadersLength);

        CND_CONDITION(readerTermDocs != NULL,"No memory could be allocated for readerTermDocs");

          //Initialize the readerTermDocs pointer array
          for ( int32_t i=0;i<subReadersLength;i++){
              readerTermDocs[i]=NULL;
              }
          }
  }

  
  TermDocs* MultiTermPositions::__asTermDocs(){
	  return (TermDocs*) this;
  }
  TermPositions* MultiTermPositions::__asTermPositions(){
	  return (TermPositions*) this;
  }


  TermDocs* MultiTermPositions::termDocs(const IndexReader* reader) const {
    // Here in the MultiTermPositions class, we want this->current to always
    // be a SegmentTermPositions rather than merely a SegmentTermDocs.
    // To that end, we override the termDocs(IndexReader&) method to produce
    // a SegmentTermPositions via the underlying reader's termPositions method
    // rather merely producing a SegmentTermDocs via the reader's termDocs
    // method.
    
    TermPositions* tp = reader->termPositions();
    TermDocs* ret = tp->__asTermDocs();
    
    CND_CONDITION(ret != NULL,
        "Dynamic downcast in MultiTermPositions::termDocs from"
        " TermPositions to TermDocs failed."
      );
    return ret;
  }

  int32_t MultiTermPositions::nextPosition() {
  //Func -
  //Pre  - current != NULL
  //Post -
    CND_PRECONDITION(current != NULL,"current is NULL");
    
    TermPositions* curAsTP = current->__asTermPositions();
    
    CND_CONDITION(curAsTP != NULL,
        "Dynamic downcast in MultiTermPositions::nextPosition from"
        " SegmentTermDocs to TermPositions failed."
      )
    return curAsTP->nextPosition();
  }


CL_NS_END

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -