⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indexwriter.cpp

📁 clucene是c++版的全文检索引擎,完全移植于lucene,采用 stl 编写.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "IndexWriter.h"

#include "CLucene/document/Document.h"
#include "CLucene/store/Directory.h"
#include "CLucene/store/Lock.h"
#include "CLucene/util/VoidList.h"
#include "DocumentWriter.h"
#include "SegmentInfos.h"
#include "SegmentMerger.h"

CL_NS_USE(store)
CL_NS_USE(util)
CL_NS_USE(document)
CL_NS_USE(analysis)
CL_NS_DEF(index)

  IndexWriter::IndexWriter(const char* path, Analyzer* a, const bool create, const bool _closeDir):
		directory( FSDirectory::getDirectory(path, create) ),
		analyzer(a),
		segmentInfos (_CLNEW SegmentInfos),
    closeDir(_closeDir){
  //Func - Constructor
  //       Constructs an IndexWriter for the index in path.
  //Pre  - path != NULL and contains a named directory path
  //       a holds a valid reference to an analyzer and analyzes the text to be indexed
  //       create indicates if the indexWriter must create a new index located at path or just open it
  //Post - If create is true, then a new, empty index has been created in path, replacing the index
  //       already there, if any. The named directory path is owned by this Instance

	  CND_PRECONDITION(path != NULL, "path is NULL");

	  //Continue initializing the instance by _IndexWriter
	  _IndexWriter ( create );
  }

  IndexWriter::IndexWriter(Directory* d, Analyzer* a, const bool create, const bool _closeDir):
	  directory(_CL_POINTER(d)),
	  analyzer(a),
	  segmentInfos (_CLNEW SegmentInfos),
      closeDir(_closeDir)
  {
  //Func - Constructor
  //       Constructs an IndexWriter for the index in path.
  //Pre  - d contains a valid reference to a directory
  //       a holds a valid reference to an analyzer and analyzes the text to be indexed
  //       create indicates if the indexWriter must create a new index located at path or just open it
  //Post - If create is true, then a new, empty index has been created in path, replacing the index
  //       already there, if any. The directory d is not owned by this Instance

	  //Continue initializing the instance by _IndexWriter
	  _IndexWriter ( create );
  }

  void IndexWriter::_IndexWriter(const bool create){
  //Func - Initialises the instances
  //Pre  - create indicates if the indexWriter must create a new index located at path or just open it
  //Post -
	  maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH;

   similarity = CL_NS(search)::Similarity::getDefault();

   useCompoundFile = true;

	//Create a ramDirectory
	ramDirectory = _CLNEW TransactionalRAMDirectory;

	CND_CONDITION(ramDirectory != NULL,"ramDirectory is NULL");

	//Initialize the writeLock to
	writeLock  = NULL;
	//Initialize the mergeFactor to 10 indicating that a merge will occur after 10 documents
	//have been added to the index managed by this IndexWriter
	mergeFactor = 10;
	//Initialize maxMergeDocs to INT_MAX
	maxMergeDocs = INT_MAX;

   //initialise to LUCENE_INDEXWRITER_DEFAULT_MIN_MERGE_DOCS
   minMergeDocs = LUCENE_INDEXWRITER_DEFAULT_MIN_MERGE_DOCS;

	//Create a new lock using the name "write.lock"
	LuceneLock* newLock = directory->makeLock("write.lock");

	//Condition check to see if newLock has been allocated properly
	CND_CONDITION(newLock != NULL, "No memory could be allocated for LuceneLock newLock");

	//Try to obtain a write lock
	if (!newLock->obtain(LUCENE_WRITE_LOCK_TIMEOUT)){
		//Write lock could not be obtained so delete it
		_CLDELETE(newLock);
		//Reset the instance
		_finalize();
		//throw an exception because no writelock could be created or obtained
		_CLTHROWA(CL_ERR_IO, "Index locked for write or no write access." );
	}

	//The Write Lock has been obtained so save it for later use
	writeLock = newLock;

	//Create a new lock using the name "commit.lock"
	LuceneLock* lock = directory->makeLock("commit.lock");

	//Condition check to see if lock has been allocated properly
	CND_CONDITION(lock != NULL, "No memory could be allocated for LuceneLock lock");

	IndexWriterLockWith with ( lock,LUCENE_WRITE_LOCK_TIMEOUT,this,create );

	{
		SCOPED_LOCK_MUTEX(directory->THIS_LOCK) // in- & inter-process sync
		with.run();
	}

	//Release the commit lock
	_CLDELETE(lock);

   isOpen = true;
  }

  void IndexWriter::_finalize(){
  //Func - Releases all the resources of the instance
  //Pre  - true
  //Post - All the releases have been released

	  if(writeLock != NULL){
		  //release write lock
		  writeLock->release();
		  _CLDELETE( writeLock );
	  }

	  //Delete the ramDirectory
	  if ( ramDirectory != NULL ){
			ramDirectory->close();
			_CLDECDELETE(ramDirectory);
	  }

	  //clear segmentInfos and delete it
	  _CLDELETE(segmentInfos);

  }

  IndexWriter::~IndexWriter() {
  //Func - Destructor
  //Pre  - true
  //Post - The instance has been destroyed
     close();
	 _finalize();
  }


  void* IndexWriterLockWith::doBody() {
  //Func - Writes segmentInfos to or reads  segmentInfos from disk
  //Pre  - writer != NULL
  //Post - if create is true then segementInfos has been written to disk otherwise
  //       segmentInfos has been read from disk

	  CND_PRECONDITION(writer != NULL, "writer is NULL");

	  if (create)
		  writer->segmentInfos->write(writer->getDirectory());
	  else
		  writer->segmentInfos->read(writer->getDirectory());

	  return NULL;
  }

  void* IndexWriterLockWith2::doBody(){
  //Func - Writes the segmentInfos to Disk and deletes unused segments
  //Pre  - writer != NULL
  //Post - segmentInfos have been written to disk and unused segments have been deleted

	  CND_PRECONDITION(writer != NULL, "writer is NULL");

	  //commit before deleting
	  writer->segmentInfos->write(writer->getDirectory());
	  //delete now-unused segments
	  writer->deleteSegments(segmentsToDelete);

	  return NULL;
  }

  void IndexWriter::close( ) {
  //Func - Flushes all changes to an index, closes all associated files, and closes
  //       the directory that the index is stored in.
  //Pre  - closeDir indicates if the directory must be closed or not
  //Post - All the changes have been flushed to disk and the write lock has been released
  //       The ramDirectory has also been closed. The directory has been closed
  //       if the reference count of the directory reaches zero

	 SCOPED_LOCK_MUTEX(THIS_LOCK)
     if ( isOpen ){
	   //Flush the Ram Segments
	   flushRamSegments();
	   //Close the ram directory
	   if ( ramDirectory != NULL ){
		  ramDirectory->close();
		  _CLDECDELETE(ramDirectory);
	   }

	   //Check if this instance must close the directory
	   if ( closeDir ){
		   directory->close();
	   }
	   _CLDECDELETE(directory);

      // release write lock
	   if (writeLock != NULL){
		   writeLock->release();
		   _CLDELETE( writeLock );
	   }

       isOpen = false;
     }
  }


  int32_t IndexWriter::docCount(){
  //Func - Counts the number of documents in the index
  //Pre  - true
  //Post - The number of documents have been returned

	  SCOPED_LOCK_MUTEX(THIS_LOCK)
	  
	  //Initialize count
	  int32_t count = 0;

	  //Iterate through all segmentInfos
	  for (int32_t i = 0; i < segmentInfos->size(); i++) {
		  //Get the i-th SegmentInfo
		  SegmentInfo* si = segmentInfos->info(i);
		  //Retrieve the number of documents of the segment and add it to count
		  count += si->docCount;
      }
	  return count;
  }

  void IndexWriter::addDocument(Document* doc, Analyzer* analyzer) {
  //Func - Adds a document to the index
  //Pre  - doc contains a valid reference to a document
  //       ramDirectory != NULL
  //Post - The document has been added to the index of this IndexWriter
	CND_PRECONDITION(ramDirectory != NULL,"ramDirectory is NULL");

	if ( analyzer == NULL )
		analyzer = this->analyzer;
		
	ramDirectory->transStart();
	try {
		char* segmentName = newSegmentName();
		CND_CONDITION(segmentName != NULL, "segmentName is NULL");
		try {
			//Create the DocumentWriter using a ramDirectory and analyzer
			// supplied by the IndexWriter (this).
			DocumentWriter* dw = _CLNEW DocumentWriter(
				ramDirectory, analyzer, similarity, maxFieldLength );
			CND_CONDITION(dw != NULL, "dw is NULL");
			try {
				//Add the client-supplied document to the new segment.
				dw->addDocument(segmentName, doc);
			} _CLFINALLY(
				_CLDELETE(dw);
			);

			//Create a new SegmentInfo instance about this new segment.
			SegmentInfo* si = _CLNEW SegmentInfo(segmentName, 1, ramDirectory);
			CND_CONDITION(si != NULL, "Si is NULL");

			{
				SCOPED_LOCK_MUTEX(THIS_LOCK)

   				//Add the info object for this particular segment to the list
   				// of all segmentInfos->
   				segmentInfos->add(si);
	   			
          		//Check to see if the segments must be merged
          		maybeMergeSegments();
			}
		} _CLFINALLY(
			_CLDELETE_CaARRAY(segmentName);
		);
		
	} catch (...) {
		ramDirectory->transAbort();
		throw;
	}
	ramDirectory->transCommit();
  }


  void IndexWriter::optimize() {
  //Func - Optimizes the index for which this Instance is responsible
  //Pre  - true
  //Post -
    SCOPED_LOCK_MUTEX(THIS_LOCK)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -