📄 multireader.cpp
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "MultiReader.h"
#include "IndexReader.h"
#include "CLucene/document/Document.h"
#include "Terms.h"
#include "SegmentMergeQueue.h"
CL_NS_USE(store)
CL_NS_DEF(index)
MultiReader::MultiReader(IndexReader** subReaders):
IndexReader(subReaders == NULL || subReaders[0] == NULL ? NULL : subReaders[0]->getDirectory()),
normsCache(true, true)
{
initialize(subReaders);
}
MultiReader::MultiReader(Directory* directory, SegmentInfos* sis, IndexReader** subReaders):
IndexReader(directory, sis, false),
normsCache(true, true)
{
initialize(subReaders);
}
MultiReader::~MultiReader() {
//Func - Destructor
//Pre - true
//Post - The instance has been destroyed all IndexReader instances
// this instance managed have been destroyed to
_CLDELETE_ARRAY(starts);
//Iterate through the subReaders and destroy each reader
if (subReaders && subReadersLength > 0) {
for (int32_t i = 0; i < subReadersLength; i++) {
_CLDELETE(subReaders[i]);
}
}
//Destroy the subReaders array
_CLDELETE_ARRAY(subReaders);
}
void MultiReader::initialize(IndexReader** subReaders){
this->subReadersLength = 0;
this->subReaders = subReaders;
//count the subReaders size
if ( subReaders != NULL ){
while ( subReaders[subReadersLength] != NULL ){
subReadersLength++;
}
}
_maxDoc = 0;
_numDocs = -1;
starts = _CL_NEWARRAY(int32_t,subReadersLength + 1); // build starts array
for (int32_t i = 0; i < subReadersLength; i++) {
starts[i] = _maxDoc;
// compute maxDocs
_maxDoc += subReaders[i]->maxDoc();
if (subReaders[i]->hasDeletions())
_hasDeletions = true;
}
starts[subReadersLength] = _maxDoc;
}
TermFreqVector** MultiReader::getTermFreqVectors(int32_t n){
int32_t i = readerIndex(n); // find segment num
return subReaders[i]->getTermFreqVectors(n - starts[i]); // dispatch to segment
}
TermFreqVector* MultiReader::getTermFreqVector(int32_t n, const TCHAR* field){
int32_t i = readerIndex(n); // find segment num
return subReaders[i]->getTermFreqVector(n - starts[i], field);
}
int32_t MultiReader::numDocs() {
SCOPED_LOCK_MUTEX(THIS_LOCK)
if (_numDocs == -1) { // check cache
int32_t n = 0; // cache miss--recompute
for (int32_t i = 0; i < subReadersLength; i++)
n += subReaders[i]->numDocs(); // sum from readers
_numDocs = n;
}
return _numDocs;
}
int32_t MultiReader::maxDoc() const {
return _maxDoc;
}
CL_NS(document)::Document* MultiReader::document(const int32_t n) {
int32_t i = readerIndex(n); // find segment num
return subReaders[i]->document(n - starts[i]); // dispatch to segment reader
}
bool MultiReader::isDeleted(const int32_t n) {
int32_t i = readerIndex(n); // find segment num
return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader
}
uint8_t* MultiReader::norms(const TCHAR* field){
SCOPED_LOCK_MUTEX(THIS_LOCK)
uint8_t* bytes;
bytes = normsCache.get(field);
if (bytes != NULL){
return bytes; // cache hit
}
bytes = _CL_NEWARRAY(uint8_t,maxDoc());
for (int32_t i = 0; i < subReadersLength; i++)
subReaders[i]->norms(field, bytes +starts[i]);
//Unfortunately the data in the normCache can get corrupted, since it's being loaded with string
//keys that may be deleted while still in use by the map. To prevent this field is duplicated
//and then stored in the normCache
TCHAR* key = STRDUP_TtoT(field);
//update cache
normsCache.put(key, bytes);
return bytes;
}
void MultiReader::norms(const TCHAR* field, uint8_t* result) {
SCOPED_LOCK_MUTEX(THIS_LOCK)
uint8_t* bytes = normsCache.get(field);
if (bytes != NULL){ // cache hit
memcpy(result,bytes,maxDoc());
}
CLDebugBreak();
for (int32_t i = 0; i < subReadersLength; i++) // read from segments
subReaders[i]->norms(field, result+starts[i] );
}
void MultiReader::doSetNorm(int32_t n, const TCHAR* field, uint8_t value){
normsCache.remove(field); // clear cache
int32_t i = readerIndex(n); // find segment num
subReaders[i]->setNorm(n-starts[i], field, value); // dispatch
}
TermEnum* MultiReader::terms() const {
return _CLNEW MultiTermEnum(subReaders, starts, NULL);
}
TermEnum* MultiReader::terms(const Term* term) const {
return _CLNEW MultiTermEnum(subReaders, starts, term);
}
int32_t MultiReader::docFreq(const Term* t) const {
int32_t total = 0; // sum freqs in Multi
for (int32_t i = 0; i < subReadersLength; i++)
total += subReaders[i]->docFreq(t);
return total;
}
TermDocs* MultiReader::termDocs() const {
TermDocs* ret = _CLNEW MultiTermDocs(subReaders, starts);
return ret;
}
TermPositions* MultiReader::termPositions() const {
TermPositions* ret = (TermPositions*)_CLNEW MultiTermPositions(subReaders, starts);
return ret;
}
void MultiReader::doDelete(const int32_t n) {
_numDocs = -1; // invalidate cache
int32_t i = readerIndex(n); // find segment num
subReaders[i]->deleteDocument(n - starts[i]); // dispatch to segment reader
_hasDeletions = true;
}
int32_t MultiReader::readerIndex(const int32_t n) const { // find reader for doc n:
int32_t lo = 0; // search starts array
int32_t hi = subReadersLength - 1; // for first element less
// than n, return its index
while (hi >= lo) {
int32_t mid = (lo + hi) >> 1;
int32_t midValue = starts[mid];
if (n < midValue)
hi = mid - 1;
else if (n > midValue)
lo = mid + 1;
else{ // found a match
while (mid+1 < subReadersLength && starts[mid+1] == midValue) {
mid++; // scan to last match
}
return mid;
}
}
return hi;
}
void MultiReader::doUndeleteAll(){
for (int32_t i = 0; i < subReadersLength; i++)
subReaders[i]->undeleteAll();
_hasDeletions = false;
}
void MultiReader::doCommit() {
for (int32_t i = 0; i < subReadersLength; i++)
subReaders[i]->commit(); //todo: check this... might be a jlucene bug too
}
void MultiReader::doClose() {
SCOPED_LOCK_MUTEX(THIS_LOCK)
for (int32_t i = 0; i < subReadersLength; i++){
subReaders[i]->close();
}
//close the dir (which will should only do a decref)
//this->getDirectory()->close();
}
TCHAR** MultiReader::getFieldNames() {
// maintain a unique set of field names
CL_NS(util)::CLSetList<TCHAR*> fieldSet;
for (int32_t i = 0; i < subReadersLength; i++) {
IndexReader* reader = subReaders[i];
TCHAR** names = reader->getFieldNames();
// iterate through the field names and add them to the set
int32_t j=0;
while ( names[j] != NULL ){
if ( fieldSet.find(names[j]) == fieldSet.end() )
fieldSet.insert(names[j]); //steal the name buffer
else{
_CLDELETE_CARRAY(names[j]);
}
j++;
}
_CLDELETE_ARRAY(names);
}
TCHAR** ret = _CL_NEWARRAY(TCHAR*,fieldSet.size()+1);
fieldSet.toArray(ret);
return ret;
}
TCHAR** MultiReader::getFieldNames(bool indexed) {
// maintain a unique set of field names
CL_NS(util)::CLSetList<TCHAR*> fieldSet;
for (int32_t i = 0; i < subReadersLength; i++) {
IndexReader* reader = subReaders[i];
TCHAR** names = reader->getFieldNames(indexed);
// iterate through the field names and add them to the set
int32_t j=0;
while ( names[j] != NULL ){
if ( fieldSet.find(names[j]) == fieldSet.end() )
fieldSet.insert(names[j]);
else{
_CLDELETE_CARRAY(names[j]);
}
j++;
}
_CLDELETE_ARRAY(names);
}
TCHAR** ret = _CL_NEWARRAY(TCHAR*,fieldSet.size()+1);
fieldSet.toArray(ret);
return ret;
}
TCHAR** MultiReader::getIndexedFieldNames(bool storedTermVector) {
// maintain a unique set of field names
CL_NS(util)::CLSetList<TCHAR*> fieldSet;
for (int32_t i = 0; i < subReadersLength; i++) {
IndexReader* reader = subReaders[i];
TCHAR** names = reader->getIndexedFieldNames(storedTermVector);
// iterate through the field names and add them to the set
int32_t j=0;
while ( names[j] != NULL ){
if ( fieldSet.find(names[j]) == fieldSet.end() )
fieldSet.insert(names[j]);
else{
_CLDELETE_CARRAY(names[j]);
}
j++;
}
_CLDELETE_ARRAY(names);
}
TCHAR** ret = _CL_NEWARRAY(TCHAR*,fieldSet.size()+1);
fieldSet.toArray(ret);
return ret;
}
MultiTermDocs::MultiTermDocs(){
//Func - Default constructor
// Initialises an empty MultiTermDocs.
// This constructor is needed to allow the constructor of MultiTermPositions
// initialise the instance by itself
//Pre - true
//Post - An empty
subReaders = NULL;
subReadersLength = 0;
starts = NULL;
base = 0;
pointer = 0;
current = NULL;
term = NULL;
readerTermDocs = NULL;
}
MultiTermDocs::MultiTermDocs(IndexReader** r, const int32_t* s){
//Func - Constructor
//Pre - if r is NULL then rLen must be 0 else if r != NULL then rLen > 0
// s != NULL
//Post - The instance has been created
//count readers
subReadersLength = 0;
subReaders = r;
CND_PRECONDITION(s != NULL, "s is NULL");
if ( subReaders != NULL ){
while ( subReaders[subReadersLength] != NULL )
subReadersLength++;
}
starts = s;
base = 0;
pointer = 0;
current = NULL;
term = NULL;
readerTermDocs = NULL;
//Check if there are subReaders
if(subReaders != NULL && subReadersLength > 0){
readerTermDocs = _CL_NEWARRAY(TermDocs*, subReadersLength+1);
CND_CONDITION(readerTermDocs != NULL,"No memory could be allocated for readerTermDocs");
//Initialize the readerTermDocs pointer array to NULLs
for ( int32_t i=0;i<subReadersLength+1;i++){
readerTermDocs[i]=NULL;
}
}
}
MultiTermDocs::~MultiTermDocs(){
//Func - Destructor
//Pre - true
//Post - The instance has been destroyed
close();
}
TermPositions* MultiTermDocs::__asTermPositions(){
return NULL;
}
int32_t MultiTermDocs::doc() const {
CND_PRECONDITION(current!=NULL,"current==NULL, check that next() was called");
return base + current->doc();
}
int32_t MultiTermDocs::freq() const {
CND_PRECONDITION(current!=NULL,"current==NULL, check that next() was called");
return current->freq();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -