⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 compoundfile.cpp

📁 clucene是c++版的全文检索引擎,完全移植于lucene,采用 stl 编写.
💻 CPP
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "CompoundFile.h"
#include "CLucene/util/Misc.h"

CL_NS_USE(store)
CL_NS_USE(util)
CL_NS_DEF(index)

CompoundFileReader::CSInputStream::CSInputStream(CL_NS(store)::IndexInput* base, const int64_t fileOffset, const int64_t length){
   this->base = base;
   this->fileOffset = fileOffset;
   this->_length = length;   // variable in the superclass
}
	
void CompoundFileReader::CSInputStream::readInternal(uint8_t* b, const int32_t len)
{
   SCOPED_LOCK_MUTEX(base->THIS_LOCK)

   int64_t start = getFilePointer();
   if(start + len > _length)
      _CLTHROWA(CL_ERR_IO,"read past EOF");
   base->seek(fileOffset + start);
   base->readBytes(b, len);
}
CompoundFileReader::CSInputStream::~CSInputStream(){
}
IndexInput* CompoundFileReader::CSInputStream::clone() const
{
	return _CLNEW CSInputStream(*this);
}
CompoundFileReader::CSInputStream::CSInputStream(const CSInputStream& clone): BufferedIndexInput(clone){
   this->base = clone.base; //no need to clone this..
   this->fileOffset = clone.fileOffset;
   this->_length = clone._length;
}

void CompoundFileReader::CSInputStream::close(){
}

CompoundFileReader::CompoundFileReader(Directory* dir, const char* name):
	entries(true,true)
{
   directory = dir;
   STRCPY_AtoA(fileName,name,CL_MAX_PATH);

   bool success = false;

   try {
      stream = dir->openInput(name);

      // read the directory and init files
      int32_t count = stream->readVInt();
      FileEntry* entry = NULL;
      TCHAR tid[CL_MAX_PATH];
      for (int32_t i=0; i<count; i++) {
            int64_t offset = stream->readLong();
            stream->readString(tid,CL_MAX_PATH);
            char* aid = STRDUP_TtoA(tid);

            if (entry != NULL) {
               // set length of the previous entry
               entry->length = offset - entry->offset;
            }

            entry = _CLNEW FileEntry();
            entry->offset = offset;
            entries.put(aid, entry);
      }

      // set the length of the final entry
      if (entry != NULL) {
            entry->length = stream->length() - entry->offset;
      }

      success = true;

   }_CLFINALLY(
      if (! success && (stream != NULL)) {
            try {
               stream->close();
               _CLDELETE(stream);
			   } catch (CLuceneError& err){
                if ( err.number() != CL_ERR_IO )
                    throw err;
				   //else ignore
            }
      }
   )
}

CompoundFileReader::~CompoundFileReader(){
	close();
}

Directory* CompoundFileReader::getDirectory(){
   return directory;
}

const char* CompoundFileReader::getName(){
   return fileName;
}

void CompoundFileReader::close(){
  SCOPED_LOCK_MUTEX(THIS_LOCK)

  if (stream != NULL){
      entries.clear();
      stream->close();
      _CLDELETE(stream);
  }
}

IndexInput* CompoundFileReader::openInput(const char* id){
  SCOPED_LOCK_MUTEX(THIS_LOCK)

  if (stream == NULL)
      _CLTHROWA(CL_ERR_IO,"Stream closed");
	 
  const FileEntry* entry = entries.get(id);
  if (entry == NULL){
      char buf[CL_MAX_PATH+30];
      strcpy(buf,"No sub-file with id ");
      strncat(buf,id,CL_MAX_PATH);
      strcat(buf," found");
      _CLTHROWA(CL_ERR_IO,buf);
  }
  return _CLNEW CSInputStream(stream, entry->offset, entry->length);
}

char** CompoundFileReader::list() const{
  char** r = _CL_NEWARRAY(char*,entries.size()+1);
  int32_t j = 0;
  for ( CL_NS(util)::CLHashMap<const char*,CompoundFileReader::FileEntry*,Compare::Char,Equals::Char>::const_iterator i=entries.begin();i!=entries.end();i++ ){
     r[j] = STRDUP_AtoA(i->first);
     j++;
  }
  r[entries.size()]=NULL;

  return r;
}

bool CompoundFileReader::fileExists(const char* name) const{
   return entries.exists(name);
}

int64_t CompoundFileReader::fileModified(const char* name) const{
  return directory->fileModified(fileName);
}

void CompoundFileReader::touchFile(const char* name){
  directory->touchFile(fileName);
}

bool CompoundFileReader::deleteFile(const char* name){
   _CLTHROWA(CL_ERR_UnsupportedOperation,"UnsupportedOperationException");
}

void CompoundFileReader::renameFile(const char* from, const char* to){
   _CLTHROWA(CL_ERR_UnsupportedOperation,"UnsupportedOperationException");
}

int64_t CompoundFileReader::fileLength(const char* name) const{
  FileEntry* e = entries.get(name);
  if (e == NULL){
     char buf[CL_MAX_PATH + 30];
     strcpy(buf,"File ");
     strncat(buf,name,CL_MAX_PATH );
     strcat(buf," does not exist");
  }
  return e->length;
}
IndexOutput* CompoundFileReader::createOutput(const char* name){
   _CLTHROWA(CL_ERR_UnsupportedOperation,"UnsupportedOperationException");
}
LuceneLock* CompoundFileReader::makeLock(const char* name){
   _CLTHROWA(CL_ERR_UnsupportedOperation,"UnsupportedOperationException");
}

TCHAR* CompoundFileReader::toString() const{
	TCHAR* ret = _CL_NEWARRAY(TCHAR,strlen(fileName)+20); //20=strlen("CompoundFileReader@")
	_tcscpy(ret,_T("CompoundFileReader@"));
	STRCPY_AtoT(ret+19,fileName,strlen(fileName));

	return ret;
}

CompoundFileWriter::CompoundFileWriter(Directory* dir, const char* name):
	ids(true),entries(true){
  if (dir == NULL)
      _CLTHROWA(CL_ERR_IO,"Missing directory");
  if (name == NULL)
      _CLTHROWA(CL_ERR_IO,"Missing name");
  merged = false;
  directory = dir;
  strncpy(fileName,name,CL_MAX_PATH);
}

CompoundFileWriter::~CompoundFileWriter(){
}

Directory* CompoundFileWriter::getDirectory(){
  return directory;
}

/** Returns the name of the compound file. */
const char* CompoundFileWriter::getName(){
  return fileName;
}

void CompoundFileWriter::addFile(const char* file){
  if (merged)
      _CLTHROWA(CL_ERR_IO,"Can't add extensions after merge has been called");

  if (file == NULL)
      _CLTHROWA(CL_ERR_IO,"Missing source file");

  if (ids.find(file)!=ids.end()){
     char buf[CL_MAX_PATH + 30];
     strcpy(buf,"File ");
     strncat(buf,file,CL_MAX_PATH);
     strcat(buf," already added");
     _CLTHROWA(CL_ERR_IO,buf);
  }
  ids.insert(STRDUP_AtoA(file));

  WriterFileEntry* entry = _CLNEW WriterFileEntry();
  STRCPY_AtoA(entry->file,file,CL_MAX_PATH);
  entries.push_back(entry);
}

void CompoundFileWriter::close(){
  if (merged)
      _CLTHROWA(CL_ERR_IO,"Merge already performed");

  if (entries.size()==0) //isEmpty()
      _CLTHROWA(CL_ERR_IO,"No entries to merge have been defined");

  merged = true;

  // open the compound stream
  IndexOutput* os = NULL;
  try {
      os = directory->createOutput(fileName);

      // Write the number of entries
      os->writeVInt(entries.size());

      // Write the directory with all offsets at 0.
      // Remember the positions of directory entries so that we can
      // adjust the offsets later
      { //msvc6 for scope fix
		  TCHAR tfile[CL_MAX_PATH];
		  for ( CLLinkedList<WriterFileEntry*>::iterator i=entries.begin();i!=entries.end();i++ ){
			  WriterFileEntry* fe = *i;
			  fe->directoryOffset = os->getFilePointer();
			  os->writeLong(0);    // for now
			  STRCPY_AtoT(tfile,fe->file,CL_MAX_PATH);
			  os->writeString(tfile,_tcslen(tfile));
		  }
	  }

      // Open the files and copy their data into the stream.
      // Remeber the locations of each file's data section.
      { //msvc6 for scope fix
		  int32_t bufferLength = 1024;
		  uint8_t buffer[1024];
		  for ( CL_NS(util)::CLLinkedList<WriterFileEntry*>::iterator i=entries.begin();i!=entries.end();i++ ){
			  WriterFileEntry* fe = *i;
			  fe->dataOffset = os->getFilePointer();
			  copyFile(fe, os, buffer, bufferLength);
		  }
	  }

	  { //msvc6 for scope fix
		  // Write the data offsets into the directory of the compound stream
		  for ( CLLinkedList<WriterFileEntry*>::iterator i=entries.begin();i!=entries.end();i++ ){
			  WriterFileEntry* fe = *i;
			  os->seek(fe->directoryOffset);
			  os->writeLong(fe->dataOffset);
		  }
	  }


  } _CLFINALLY (
	  if (os != NULL) try { os->close(); _CLDELETE(os); } catch (...) { }
  );
}


void CompoundFileWriter::copyFile(WriterFileEntry* source, IndexOutput* os, uint8_t* buffer, int32_t bufferLength){
  IndexInput* is = NULL;
  try {
      int64_t startPtr = os->getFilePointer();

      is = directory->openInput(source->file);
      int64_t length = is->length();
      int64_t remainder = length;
      int32_t chunk = bufferLength;

      while(remainder > 0) {
          int32_t len = (int32_t)min((int64_t)chunk, remainder);
          is->readBytes(buffer, len);
          os->writeBytes(buffer, len);
          remainder -= len;
      }

      // Verify that remainder is 0
      if (remainder != 0){
         TCHAR buf[CL_MAX_PATH+100];
         _sntprintf(buf,CL_MAX_PATH+100,_T("Non-zero remainder length after copying")
          _T(": %d (id: %s, length: %d, buffer size: %d)"),
          remainder,source->file,length,chunk );
		 _CLTHROWT(CL_ERR_IO,buf);
      }

      // Verify that the output length diff is equal to original file
      int64_t endPtr = os->getFilePointer();
      int64_t diff = endPtr - startPtr;
      if (diff != length){
         TCHAR buf[100];
         _sntprintf(buf,100,_T("Difference in the output file offsets %d ")
            _T("does not match the original file length %d"),diff,length);
         _CLTHROWT(CL_ERR_IO,buf);
      }
  } _CLFINALLY (
     if (is != NULL){
        is->close();
        _CLDELETE(is);
     }
  );
}

CL_NS_END

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -