📄 terminfosreader.cpp
字号:
// but before end of block
if (
//the length of indexTerms (the number of terms in enumerator) equals
//_enum_offset OR
indexTermsLength == _enumOffset ||
//term is positioned in front of term found at _enumOffset in indexTerms
term->compareTo(&indexTerms[_enumOffset]) < 0){
//no need to seek, retrieve the TermInfo for term
return scanEnum(term);
}
}
//Reposition current term in the enumeration
seekEnum(getIndexOffset(term));
//Return the TermInfo for term
return scanEnum(term);
}
int64_t TermInfosReader::getPosition(const Term* term) {
//Func - Returns the position of a Term in the set
//Pre - term holds a valid reference to a Term
// enumerator != NULL
//Post - If term was found then its position is returned otherwise -1
//if the enumeration is empty then return -1
if (_size == 0)
return -1;
ensureIndexIsRead();
//Retrieve the indexOffset for term
int32_t indexOffset = getIndexOffset(term);
seekEnum(indexOffset);
SegmentTermEnum* enumerator = getEnum();
while(term->compareTo(enumerator->term(false)) > 0 && enumerator->next()) {}
if ( term->equals(enumerator->term(false)) ){
return enumerator->position;
}else
return -1;
}
SegmentTermEnum* TermInfosReader::terms(const Term* term) {
//Func - Returns an enumeration of terms starting at or after the named term.
// If term is null then enumerator is set to the beginning
//Pre - term holds a valid reference to a Term
// enumerator != NULL
//Post - An enumeration of terms starting at or after the named term has been returned
SegmentTermEnum* enumerator = NULL;
if ( term != NULL ){
//Seek enumerator to term; delete the new TermInfo that's returned.
TermInfo* ti = get(term);
_CLDELETE(ti);
enumerator = getEnum();
}else
enumerator = origEnum;
//Clone the entire enumeration
SegmentTermEnum* cln = enumerator->clone();
//Check if cln points to a valid instance
CND_CONDITION(cln != NULL,"cln is NULL");
return cln;
}
void TermInfosReader::ensureIndexIsRead() {
//Func - Reads the term info index file or .tti file.
// This file contains every IndexInterval-th entry from the .tis file,
// along with its location in the "tis" file. This is designed to be read entirely
// into memory and used to provide random access to the "tis" file.
//Pre - indexTerms = NULL
// indexInfos = NULL
// indexPointers = NULL
//Post - The term info index file has been read into memory
SCOPED_LOCK_MUTEX(THIS_LOCK)
if ( indexTerms != NULL )
return;
try {
indexTermsLength = (size_t)indexEnum->size;
//Instantiate an block of Term's,so that each one doesn't have to be new'd
indexTerms = _CL_NEWARRAY(Term,indexTermsLength);
CND_CONDITION(indexTerms != NULL,"No memory could be allocated for indexTerms");//Check if is indexTerms is a valid array
//Instantiate an big block of TermInfo's, so that each one doesn't have to be new'd
indexInfos = _CL_NEWARRAY(TermInfo,indexTermsLength);
CND_CONDITION(indexInfos != NULL,"No memory could be allocated for indexInfos"); //Check if is indexInfos is a valid array
//Instantiate an array indexPointers that contains pointers to the term info index file
indexPointers = _CL_NEWARRAY(int64_t,indexTermsLength);
CND_CONDITION(indexPointers != NULL,"No memory could be allocated for indexPointers");//Check if is indexPointers is a valid array
//Iterate through the terms of indexEnum
for (int32_t i = 0; indexEnum->next(); ++i){
indexTerms[i].set(indexEnum->term(false),indexEnum->term(false)->text());
indexEnum->getTermInfo(&indexInfos[i]);
indexPointers[i] = indexEnum->indexPointer;
}
}_CLFINALLY(
indexEnum->close();
//Close and delete the IndexInput is. The close is done by the destructor.
_CLDELETE( indexEnum->input );
_CLDELETE( indexEnum );
);
}
int32_t TermInfosReader::getIndexOffset(const Term* term){
//Func - Returns the offset of the greatest index entry which is less than or equal to term.
//Pre - term holds a reference to a valid term
// indexTerms != NULL
//Post - The new offset has been returned
//Check if is indexTerms is a valid array
CND_PRECONDITION(indexTerms != NULL,"indexTerms is NULL");
int32_t lo = 0;
int32_t hi = indexTermsLength - 1;
int32_t mid;
int32_t delta;
while (hi >= lo) {
//Start in the middle betwee hi and lo
mid = (lo + hi) >> 1;
//Check if is indexTerms[mid] is a valid instance of Term
CND_PRECONDITION(&indexTerms[mid] != NULL,"indexTerms[mid] is NULL");
CND_PRECONDITION(mid < indexTermsLength,"mid >= indexTermsLength");
//Determine if term is before mid or after mid
delta = term->compareTo(&indexTerms[mid]);
if (delta < 0){
//Calculate the new hi
hi = mid - 1;
}else if (delta > 0){
//Calculate the new lo
lo = mid + 1;
}else{
//term has been found so return its position
return mid;
}
}
// the new starting offset
return hi;
}
void TermInfosReader::seekEnum(const int32_t indexOffset) {
//Func - Reposition the current Term and TermInfo to indexOffset
//Pre - indexOffset >= 0
// indexTerms != NULL
// indexInfos != NULL
// indexPointers != NULL
//Post - The current Term and Terminfo have been repositioned to indexOffset
CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative number");
CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
CND_PRECONDITION(indexInfos != NULL, "indexInfos is NULL");
CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");
SegmentTermEnum* enumerator = getEnum();
enumerator->seek(
indexPointers[indexOffset],
(indexOffset * enumerator->indexInterval) - 1,
&indexTerms[indexOffset],
&indexInfos[indexOffset]
);
}
TermInfo* TermInfosReader::scanEnum(const Term* term) {
//Func - Scans the Enumeration of terms for term and returns the corresponding TermInfo instance if found.
// The search is started from the current term.
//Pre - term contains a valid reference to a Term
// enumerator != NULL
//Post - if term has been found the corresponding TermInfo has been returned otherwise NULL
// has been returned
SegmentTermEnum* enumerator = getEnum();
enumerator->scanTo(term);
//Check if the at the position the Term term can be found
if (enumerator->term(false) != NULL && term->equals(enumerator->term(false)) ){
//Return the TermInfo instance about term
return enumerator->getTermInfo();
}else{
//term was not found so no TermInfo can be returned
return NULL;
}
}
Term* TermInfosReader::scanEnum(const int32_t position) {
//Func - Scans the enumeration to the requested position and returns the
// Term located at that position
//Pre - position > = 0
// enumerator != NULL
//Post - The Term at the requested position has been returned
SegmentTermEnum* enumerator = getEnum();
//As long the position of the enumeration enumerator is smaller than the requested one
while(enumerator->position < position){
//Move the current of enumerator to the next
if (!enumerator->next()){
//If there is no next it means that the requested position was to big
return NULL;
}
}
//Return the Term a the requested position
return enumerator->term();
}
CL_NS_END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -