📄 indexwriter.cpp
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "IndexWriter.h"
#include "CLucene/document/Document.h"
#include "CLucene/store/Directory.h"
#include "CLucene/store/Lock.h"
#include "CLucene/util/VoidList.h"
#include "DocumentWriter.h"
#include "SegmentInfos.h"
#include "SegmentMerger.h"
CL_NS_USE(store)
CL_NS_USE(util)
CL_NS_USE(document)
CL_NS_USE(analysis)
CL_NS_DEF(index)
IndexWriter::IndexWriter(const char* path, Analyzer* a, const bool create, const bool _closeDir):
directory( FSDirectory::getDirectory(path, create) ),
analyzer(a),
segmentInfos (_CLNEW SegmentInfos),
closeDir(_closeDir){
//Func - Constructor
// Constructs an IndexWriter for the index in path.
//Pre - path != NULL and contains a named directory path
// a holds a valid reference to an analyzer and analyzes the text to be indexed
// create indicates if the indexWriter must create a new index located at path or just open it
//Post - If create is true, then a new, empty index has been created in path, replacing the index
// already there, if any. The named directory path is owned by this Instance
CND_PRECONDITION(path != NULL, "path is NULL");
//Continue initializing the instance by _IndexWriter
_IndexWriter ( create );
}
IndexWriter::IndexWriter(Directory* d, Analyzer* a, const bool create, const bool _closeDir):
directory(_CL_POINTER(d)),
analyzer(a),
segmentInfos (_CLNEW SegmentInfos),
closeDir(_closeDir)
{
//Func - Constructor
// Constructs an IndexWriter for the index in path.
//Pre - d contains a valid reference to a directory
// a holds a valid reference to an analyzer and analyzes the text to be indexed
// create indicates if the indexWriter must create a new index located at path or just open it
//Post - If create is true, then a new, empty index has been created in path, replacing the index
// already there, if any. The directory d is not owned by this Instance
//Continue initializing the instance by _IndexWriter
_IndexWriter ( create );
}
void IndexWriter::_IndexWriter(const bool create){
//Func - Initialises the instances
//Pre - create indicates if the indexWriter must create a new index located at path or just open it
//Post -
maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH;
similarity = CL_NS(search)::Similarity::getDefault();
useCompoundFile = true;
//Create a ramDirectory
ramDirectory = _CLNEW TransactionalRAMDirectory;
CND_CONDITION(ramDirectory != NULL,"ramDirectory is NULL");
//Initialize the writeLock to
writeLock = NULL;
//Initialize the mergeFactor to 10 indicating that a merge will occur after 10 documents
//have been added to the index managed by this IndexWriter
mergeFactor = 10;
//Initialize maxMergeDocs to INT_MAX
maxMergeDocs = INT_MAX;
//initialise to LUCENE_INDEXWRITER_DEFAULT_MIN_MERGE_DOCS
minMergeDocs = LUCENE_INDEXWRITER_DEFAULT_MIN_MERGE_DOCS;
//Create a new lock using the name "write.lock"
LuceneLock* newLock = directory->makeLock("write.lock");
//Condition check to see if newLock has been allocated properly
CND_CONDITION(newLock != NULL, "No memory could be allocated for LuceneLock newLock");
//Try to obtain a write lock
if (!newLock->obtain(LUCENE_WRITE_LOCK_TIMEOUT)){
//Write lock could not be obtained so delete it
_CLDELETE(newLock);
//Reset the instance
_finalize();
//throw an exception because no writelock could be created or obtained
_CLTHROWA(CL_ERR_IO, "Index locked for write or no write access." );
}
//The Write Lock has been obtained so save it for later use
writeLock = newLock;
//Create a new lock using the name "commit.lock"
LuceneLock* lock = directory->makeLock("commit.lock");
//Condition check to see if lock has been allocated properly
CND_CONDITION(lock != NULL, "No memory could be allocated for LuceneLock lock");
IndexWriterLockWith with ( lock,LUCENE_WRITE_LOCK_TIMEOUT,this,create );
{
SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
with.run();
}
//Release the commit lock
_CLDELETE(lock);
isOpen = true;
}
void IndexWriter::_finalize(){
//Func - Releases all the resources of the instance
//Pre - true
//Post - All the releases have been released
if(writeLock != NULL){
//release write lock
writeLock->release();
_CLDELETE( writeLock );
}
//Delete the ramDirectory
if ( ramDirectory != NULL ){
ramDirectory->close();
_CLDECDELETE(ramDirectory);
}
//clear segmentInfos and delete it
_CLDELETE(segmentInfos);
}
IndexWriter::~IndexWriter() {
//Func - Destructor
//Pre - true
//Post - The instance has been destroyed
close();
_finalize();
}
void* IndexWriterLockWith::doBody() {
//Func - Writes segmentInfos to or reads segmentInfos from disk
//Pre - writer != NULL
//Post - if create is true then segementInfos has been written to disk otherwise
// segmentInfos has been read from disk
CND_PRECONDITION(writer != NULL, "writer is NULL");
if (create)
writer->segmentInfos->write(writer->getDirectory());
else
writer->segmentInfos->read(writer->getDirectory());
return NULL;
}
void* IndexWriterLockWith2::doBody(){
//Func - Writes the segmentInfos to Disk and deletes unused segments
//Pre - writer != NULL
//Post - segmentInfos have been written to disk and unused segments have been deleted
CND_PRECONDITION(writer != NULL, "writer is NULL");
//commit before deleting
writer->segmentInfos->write(writer->getDirectory());
//delete now-unused segments
writer->deleteSegments(segmentsToDelete);
return NULL;
}
void IndexWriter::close( ) {
//Func - Flushes all changes to an index, closes all associated files, and closes
// the directory that the index is stored in.
//Pre - closeDir indicates if the directory must be closed or not
//Post - All the changes have been flushed to disk and the write lock has been released
// The ramDirectory has also been closed. The directory has been closed
// if the reference count of the directory reaches zero
SCOPED_LOCK_MUTEX(THIS_LOCK)
if ( isOpen ){
//Flush the Ram Segments
flushRamSegments();
//Close the ram directory
if ( ramDirectory != NULL ){
ramDirectory->close();
_CLDECDELETE(ramDirectory);
}
//Check if this instance must close the directory
if ( closeDir ){
directory->close();
}
_CLDECDELETE(directory);
// release write lock
if (writeLock != NULL){
writeLock->release();
_CLDELETE( writeLock );
}
isOpen = false;
}
}
int32_t IndexWriter::docCount(){
//Func - Counts the number of documents in the index
//Pre - true
//Post - The number of documents have been returned
SCOPED_LOCK_MUTEX(THIS_LOCK)
//Initialize count
int32_t count = 0;
//Iterate through all segmentInfos
for (int32_t i = 0; i < segmentInfos->size(); i++) {
//Get the i-th SegmentInfo
SegmentInfo* si = segmentInfos->info(i);
//Retrieve the number of documents of the segment and add it to count
count += si->docCount;
}
return count;
}
void IndexWriter::addDocument(Document* doc, Analyzer* analyzer) {
//Func - Adds a document to the index
//Pre - doc contains a valid reference to a document
// ramDirectory != NULL
//Post - The document has been added to the index of this IndexWriter
CND_PRECONDITION(ramDirectory != NULL,"ramDirectory is NULL");
if ( analyzer == NULL )
analyzer = this->analyzer;
ramDirectory->transStart();
try {
char* segmentName = newSegmentName();
CND_CONDITION(segmentName != NULL, "segmentName is NULL");
try {
//Create the DocumentWriter using a ramDirectory and analyzer
// supplied by the IndexWriter (this).
DocumentWriter* dw = _CLNEW DocumentWriter(
ramDirectory, analyzer, similarity, maxFieldLength );
CND_CONDITION(dw != NULL, "dw is NULL");
try {
//Add the client-supplied document to the new segment.
dw->addDocument(segmentName, doc);
} _CLFINALLY(
_CLDELETE(dw);
);
//Create a new SegmentInfo instance about this new segment.
SegmentInfo* si = _CLNEW SegmentInfo(segmentName, 1, ramDirectory);
CND_CONDITION(si != NULL, "Si is NULL");
{
SCOPED_LOCK_MUTEX(THIS_LOCK)
//Add the info object for this particular segment to the list
// of all segmentInfos->
segmentInfos->add(si);
//Check to see if the segments must be merged
maybeMergeSegments();
}
} _CLFINALLY(
_CLDELETE_CaARRAY(segmentName);
);
} catch (...) {
ramDirectory->transAbort();
throw;
}
ramDirectory->transCommit();
}
void IndexWriter::optimize() {
//Func - Optimizes the index for which this Instance is responsible
//Pre - true
//Post -
SCOPED_LOCK_MUTEX(THIS_LOCK)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -