📄 terminfosreader.cpp
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "TermInfosReader.h"
#include "Term.h"
#include "Terms.h"
#include "SegmentTermEnum.h"
#include "CLucene/store/Directory.h"
#include "FieldInfos.h"
#include "TermInfo.h"
#include "TermInfosWriter.h"
#include "CLucene/util/Misc.h"
CL_NS_USE(store)
CL_NS_USE(util)
CL_NS_DEF(index)
TermInfosReader::TermInfosReader(Directory* dir, const char* seg, FieldInfos* fis):
directory (dir),fieldInfos (fis)
#ifndef _CL_DISABLE_MULTITHREADING
, enumerators(false, true)
#endif
{
//Func - Constructor.
// Reads the TermInfos file (.tis) and eventually the Term Info Index file (.tii)
//Pre - dir is a reference to a valid Directory
// Fis contains a valid reference to an FieldInfos instance
// seg != NULL and contains the name of the segment
//Post - An instance has been created and the index named seg has been read. (Remember
// a segment is nothing more then an independently readable index)
CND_PRECONDITION(seg != NULL, "seg is NULL");
//Initialize the name of the segment
segment = seg;
//There are no indexTerms yet
indexTerms = NULL;
//So there are no indexInfos
indexInfos = NULL;
//So there are no indexPointers
indexPointers = NULL;
//Create a filname fo a Term Info File
char* tisFile = Misc::segmentname(segment,".tis");
char* tiiFile = Misc::segmentname(segment,".tii");
//Create an SegmentTermEnum for storing all the terms read of the segment
origEnum = _CLNEW SegmentTermEnum( directory->openInput( tisFile ), fieldInfos, false);
indexEnum = _CLNEW SegmentTermEnum( directory->openInput( tiiFile ), fieldInfos, true);
//Check if enumerator points to a valid instance
CND_CONDITION(origEnum != NULL, "No memory could be allocated for orig enumerator");
CND_CONDITION(indexEnum != NULL, "No memory could be allocated for index enumerator");
_CLDELETE_CaARRAY(tisFile);
_CLDELETE_CaARRAY(tiiFile);
//Get the size of the enumeration and store it in size
_size = origEnum->size;
#ifdef _CL_DISABLE_MULTITHREADING
stEnumerator = NULL;
#endif
}
TermInfosReader::~TermInfosReader(){
//Func - Destructor
//Pre - true
//Post - The instance has been destroyed
//Close the TermInfosReader to be absolutly sure that enumerator has been closed
//and the arrays indexTerms, indexPointers and indexInfos and their elements
//have been destroyed
close();
}
void TermInfosReader::close() {
//Func - Close the enumeration of TermInfos
//Pre - true
//Post - The _enumeration has been closed and the arrays
//Check if indexTerms and indexInfos exist
if (indexTerms && indexInfos){
//Iterate through arrays indexTerms and indexPointer to
//destroy their elements
#ifdef _DEBUG
for ( int32_t i=0; i<indexTermsLength;++i ){
if ( indexTerms[i].__cl_refcount != 1 )
CND_PRECONDITION(indexTerms[i].__cl_refcount==1,"TermInfosReader term was references more than internally");
// _CLDECDELETE(indexTerms[i]);
//_CLDELETE(indexInfos[i]);
}
#endif
//Delete the arrays
_CLDELETE_ARRAY(indexTerms);
_CLDELETE_ARRAY(indexInfos);
}
#ifdef _CL_DISABLE_MULTITHREADING
_CLDELETE(stEnumerator);
#endif
//Delete the arrays
_CLDELETE_ARRAY(indexPointers);
if (origEnum != NULL){
origEnum->close();
//Get a pointer to IndexInput used by the enumeration but
//instantiated in the constructor by directory.open( tisFile )
IndexInput *is = origEnum->input;
//Delete the enumuration enumerator
_CLDELETE(origEnum);
//Delete the IndexInput
_CLDELETE(is);
}
if (indexEnum != NULL){
indexEnum->close();
//Get a pointer to IndexInput used by the enumeration but
//instantiated in the constructor by directory.open( tiiFile )
IndexInput *is = indexEnum->input;
//Delete the enumuration enumerator
_CLDELETE(indexEnum);
//Delete the IndexInput
_CLDELETE(is);
}
}
int64_t TermInfosReader::size() const{
//Func - Return the size of the enumeration of TermInfos
//Pre - true
//Post - size has been returened
return _size;
}
Term* TermInfosReader::get(const int32_t position) {
//Func - Returns the nth term in the set
//Pre - position > = 0
//Post - The n-th term in the set has been returned
//Check if the size is 0 because then there are no terms
if (_size == 0)
return NULL;
SegmentTermEnum* enumerator = getEnum();
//if
if (
enumerator != NULL //an enumeration exists
&& enumerator->term(false) != NULL // term is at or past current
&& position >= enumerator->position
&& position < (enumerator->position + enumerator->indexInterval)
)
{
return scanEnum(position); // can avoid seek
}
//random-access: must seek
seekEnum(position / enumerator->indexInterval);
//Get the Term at position
return scanEnum(position);
}
//todo: currently there is no way of cleaning up a thread, if the thread ends.
//we are stuck with the terminfosreader of that thread. Hopefully this won't
//be too big a problem... solutions anyone?
SegmentTermEnum* TermInfosReader::getEnum(){
#ifdef _CL_DISABLE_MULTITHREADING
if ( stEnumerator== NULL )
stEnumerator = terms();
return stEnumerator;
#else
SCOPED_LOCK_MUTEX(enumerators_LOCK)
SegmentTermEnum* termEnum = enumerators.get(_LUCENE_CURRTHREADID);
if (termEnum == NULL){
termEnum = terms();
enumerators.put(_LUCENE_CURRTHREADID, termEnum);
}
return termEnum;
#endif
}
TermInfo* TermInfosReader::get(const Term* term){
//Func - Returns a TermInfo for a term
//Pre - term holds a valid reference to term
//Post - if term can be found its TermInfo has been returned otherwise NULL
//If the size of the enumeration is 0 then no Terms have been read
if (_size == 0)
return NULL;
ensureIndexIsRead();
// optimize sequential access: first try scanning cached enum w/o seeking
SegmentTermEnum* enumerator = getEnum();
// optimize sequential access: first try scanning cached enumerator w/o seeking
//if
if (
//the current term of the enumeration enumerator is not at the end AND
enumerator->term(false) != NULL &&
(
//there exists a previous current called prev and term is positioned after this prev OR
( enumerator->prev != NULL && term->compareTo(enumerator->prev) > 0) ||
//term is positioned at the same position as the current of enumerator or at a higher position
term->compareTo(enumerator->term(false)) >= 0 )
)
{
//Calculate the offset for the position
int32_t _enumOffset = (int32_t)(enumerator->position/enumerator->indexInterval)+1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -