📄 indexwriter.cpp
字号:
//Flush the RamSegments to disk
flushRamSegments();
while (segmentInfos->size() > 1 ||
(segmentInfos->size() == 1 &&
(SegmentReader::hasDeletions(segmentInfos->info(0)) ||
segmentInfos->info(0)->getDir()!=directory ||
(useCompoundFile &&
(!SegmentReader::usesCompoundFile(segmentInfos->info(0)) ||
SegmentReader::hasSeparateNorms(segmentInfos->info(0))))))) {
int32_t minSegment = segmentInfos->size() - mergeFactor;
mergeSegments(minSegment < 0 ? 0 : minSegment);
}
}
char* IndexWriter::newSegmentName() {
SCOPED_LOCK_MUTEX(THIS_LOCK)
TCHAR buf[9];
_i64tot(segmentInfos->counter++,buf,36); //36 is RADIX of 10 digits and 26 numbers
int32_t rlen = _tcslen(buf) + 2;
char* ret = _CL_NEWARRAY(char,rlen);
strcpy(ret,"_");
STRCPY_TtoA(ret+1,buf,rlen-1); //write at 2nd character, for a maximum of 9 characters
return ret;
}
void IndexWriter::flushRamSegments() {
//Func - Merges all RAM-resident segments.
//Pre - ramDirectory != NULL
//Post - The RAM-resident segments have been merged to disk
CND_PRECONDITION(ramDirectory != NULL, "ramDirectory is NULL");
int32_t minSegment = segmentInfos->size()-1; //don't make this unsigned...
CND_CONDITION(minSegment >= -1, "minSegment must be >= -1");
int32_t docCount = 0;
//Iterate through all the segements and check if the directory is a ramDirectory
while (minSegment >= 0 &&
segmentInfos->info(minSegment)->getDir() == ramDirectory) {
docCount += segmentInfos->info(minSegment)->docCount;
minSegment--;
}
if (minSegment < 0 || // add one FS segment?
(docCount + segmentInfos->info(minSegment)->docCount) > mergeFactor ||
!(segmentInfos->info(segmentInfos->size()-1)->getDir() == ramDirectory))
minSegment++;
CND_CONDITION(minSegment >= 0, "minSegment must be >= 0");
if (minSegment >= segmentInfos->size())
return; // none to merge
mergeSegments(minSegment);
}
void IndexWriter::maybeMergeSegments() {
//Func - Incremental Segment Merger
//Pre -
//Post -
int64_t targetMergeDocs = minMergeDocs;
// find segments smaller than current target size
while (targetMergeDocs <= maxMergeDocs) {
int32_t minSegment = segmentInfos->size();
int32_t mergeDocs = 0;
while (--minSegment >= 0) {
SegmentInfo* si = segmentInfos->info(minSegment);
if (si->docCount >= targetMergeDocs)
break;
mergeDocs += si->docCount;
}
if (mergeDocs >= targetMergeDocs){
// found a merge to do
mergeSegments(minSegment+1);
}else
break;
//increase target size
targetMergeDocs *= mergeFactor;
}
}
void IndexWriter::mergeSegments(const uint32_t minSegment) {
CLVector<SegmentReader*> segmentsToDelete(false);
const char* mergedName = newSegmentName();
#ifdef _CL_DEBUG_INFO
fprintf(_CL_DEBUG_INFO, "merging segments\n");
#endif
SegmentMerger merger(directory, mergedName, useCompoundFile);
for (int32_t i = minSegment; i < segmentInfos->size(); i++) {
SegmentInfo* si = segmentInfos->info(i);
#ifdef _CL_DEBUG_INFO
fprintf(_CL_DEBUG_INFO, " %s (%d docs)\n",si->name,si->docCount);
#endif
SegmentReader* reader = _CLNEW SegmentReader(si);
merger.add(reader);
if ((reader->getDirectory() == this->directory) || // if we own the directory
(reader->getDirectory() == this->ramDirectory)){
segmentsToDelete.push_back((SegmentReader*)reader); // queue segment for deletion
}
}
int32_t mergedDocCount = merger.merge();
#ifdef _CL_DEBUG_INFO
fprintf(_CL_DEBUG_INFO,"\n into %s (%d docs)\n",mergedName, mergedDocCount);
#endif
segmentInfos->clearto(minSegment); // pop old infos & add new
segmentInfos->add( _CLNEW SegmentInfo(mergedName, mergedDocCount, directory));
// close readers before we attempt to delete now-obsolete segments
merger.closeReaders();
LuceneLock* lock = directory->makeLock("commit.lock");
IndexWriterLockWith2 with ( lock,LUCENE_COMMIT_LOCK_TIMEOUT,this,&segmentsToDelete );
{
SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
with.run();
}
_CLDELETE( lock );
_CLDELETE_CaARRAY( mergedName ); //ADD:
}
void IndexWriter::deleteSegments(CLVector<SegmentReader*>* segments) {
AStringArrayConstWithDeletor deletable;
AStringArrayConstWithDeletor* deleteArray = readDeleteableFiles();
deleteFiles(deleteArray, &deletable); // try to delete deleteable
_CLDELETE(deleteArray);
for (uint32_t i = 0; i < segments->size(); i++) {
SegmentReader* reader = (*segments)[i];
AStringArrayConstWithDeletor* files = reader->files();
if (reader->getDirectory() == this->directory)
deleteFiles(files, &deletable); // try to delete our files
else
deleteFiles(files, reader->getDirectory()); // delete, eg, RAM files
_CLDELETE(files);
}
writeDeleteableFiles(&deletable); // note files we can't delete
}
AStringArrayConstWithDeletor* IndexWriter::readDeleteableFiles() {
AStringArrayConstWithDeletor* result = _CLNEW AStringArrayConstWithDeletor;
if (!directory->fileExists("deletable"))
return result;
IndexInput* input = directory->openInput("deletable");
try {
TCHAR tname[CL_MAX_PATH];
for (int32_t i = input->readInt(); i > 0; i--){ // read file names
input->readString(tname,CL_MAX_PATH);
result->push_back(STRDUP_TtoA(tname));
}
} _CLFINALLY(
input->close();
_CLDELETE(input);
);
return result;
}
void IndexWriter::writeDeleteableFiles(AStringArrayConstWithDeletor* files) {
IndexOutput* output = directory->createOutput("deleteable.new");
try {
output->writeInt(files->size());
TCHAR tfile[CL_MAX_PATH]; //temporary space for tchar file name
for (uint32_t i = 0; i < files->size(); i++){
STRCPY_AtoT(tfile,(*files)[i],CL_MAX_PATH);
output->writeString( tfile, _tcslen(tfile) );
}
} _CLFINALLY(
output->close();
_CLDELETE(output);
);
directory->renameFile("deleteable.new", "deletable");
}
void IndexWriter::deleteFiles(AStringArrayConstWithDeletor* files, Directory* directory) {
AStringArrayConstWithDeletor::const_iterator itr = files->begin();
while ( itr != files->end() ){
directory->deleteFile( *itr );
++itr;
}
}
void IndexWriter::deleteFiles(AStringArrayConstWithDeletor* files, AStringArrayConstWithDeletor* deletable) {
AStringArrayConstWithDeletor::const_iterator itr=files->begin();
while ( itr != files->end() ){
const char* file = *itr;
try {
if ( directory->fileExists(file) )
directory->deleteFile(file); // try to delete each file
} catch (CLuceneError& err) { // if delete fails
if ( err.number() != CL_ERR_IO )
throw err; //not an IO err... re-throw
if (directory->fileExists(file)) {
#ifdef _CL_DEBUG_INFO
fprintf(_CL_DEBUG_INFO,"%s; Will re-try later.\n", err.what());
#endif
deletable->push_back(STRDUP_AtoA(file)); // add to deletable
}
}
++itr;
}
}
void IndexWriter::addIndexes(Directory** dirs) {
//Func - Add several indexes located in different directories into the current
// one managed by this instance
//Pre - dirs != NULL and contains directories of several indexes
// dirsLength > 0 and contains the number of directories
//Post - The indexes located in the directories in dirs have been merged with
// the pre(current) index. The Resulting index has also been optimized
SCOPED_LOCK_MUTEX(THIS_LOCK)
CND_PRECONDITION(dirs != NULL, "dirs is NULL");
// start with zero or 1 seg so optimize the current
optimize();
//Iterate through the directories
int32_t i = 0;
while ( dirs[i] != NULL ) {
// DSR: Changed SegmentInfos constructor arg (see bug discussion below).
SegmentInfos sis(false);
sis.read( dirs[i]);
for (int32_t j = 0; j < sis.size(); j++) {
/* DSR:CL_BUG:
** In CLucene 0.8.11, the next call placed a pointer to a SegmentInfo
** object from stack variable $sis into the vector this->segmentInfos.
** Then, when the call to optimize() is made just before exiting this
** function, $sis had already been deallocated (and has deleted its
** member objects), leaving dangling pointers in this->segmentInfos.
** I added a SegmentInfos constructor that allowed me to order it not
** to delete its members, invoked the new constructor form above for
** $sis, and the problem was solved. */
segmentInfos->add(sis.info(j)); // add each info
}
i++;
}
optimize(); // cleanup
}
void IndexWriter::addIndexes(IndexReader** readers){
SCOPED_LOCK_MUTEX(THIS_LOCK)
optimize(); // start with zero or 1 seg
char* mergedName = newSegmentName();
SegmentMerger* merger = _CLNEW SegmentMerger(directory, mergedName, false);
if (segmentInfos->size() == 1) // add existing index, if any
merger->add(_CLNEW SegmentReader(segmentInfos->info(0)));
int32_t readersLength = 0;
while ( readers[readersLength] != NULL )
merger->add((SegmentReader*) readers[readersLength++]);
int32_t docCount = merger->merge(); // merge 'em
// pop old infos & add new
segmentInfos->clearto(0);
segmentInfos->add(_CLNEW SegmentInfo(mergedName, docCount, directory));
LuceneLock* lock = directory->makeLock("commit.lock");
IndexWriterLockWith with ( lock,LUCENE_COMMIT_LOCK_TIMEOUT,this,true);
{
SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
with.run();
}
_CLDELETE(lock);
}
CL_NS_END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -