⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reader.cpp

📁 clucene是c++版的全文检索引擎,完全移植于lucene,采用 stl 编写.
💻 CPP
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
* 
* Distributable under the terms of either the Apache License (Version 2.0) or 
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "Reader.h"

CL_NS_DEF(util)

StringReader::StringReader ( const TCHAR* value ):
  Reader(NULL,true){
	  reader = new jstreams::StringReader<TCHAR>(value);
}
StringReader::StringReader ( const TCHAR* value, const int32_t length ):
  Reader(NULL,true){
	  reader = new jstreams::StringReader<TCHAR>(value,length);
}
StringReader::StringReader ( const TCHAR* value, const int32_t length, bool copyData ):
  Reader(NULL,true){
	  reader = new jstreams::StringReader<TCHAR>(value,length, copyData);
}
StringReader::~StringReader(){
}


FileReader::FileReader ( const char* path, const char* enc,
						 const int32_t cachelen, const int32_t /*cachebuff*/ ):
	Reader(NULL, true)
{
	this->input = new jstreams::FileInputStream(path, cachelen);
	this->reader = new SimpleInputStreamReader(this->input,enc); //(this is a jstream object)
}

FileReader::~FileReader (){
    if (input) 
		delete input;
}
int32_t FileReader::read(const TCHAR*& start, int32_t _min, int32_t _max) {
    return reader->read(start, _min, _max);
}
int64_t FileReader::mark(int32_t readlimit) {
    return reader->mark(readlimit);
}
int64_t FileReader::reset(int64_t newpos) {
    return reader->reset(newpos);
}



SimpleInputStreamReader::SimpleInputStreamReader(jstreams::StreamBase<char> *i, const char* enc)
{
    finishedDecoding = false;
    input = i;
    charbuf.setSize(262);

	if ( strcmp(enc,"ASCII")==0 )
		encoding = ASCII;
#ifdef _UCS2
	else if ( strcmp(enc,"UTF-8")==0 )
		encoding = UTF8;
	else if ( strcmp(enc,"UCS-2LE")==0 )
		encoding = UCS2_LE;
#endif
	else
		_CLTHROWA(CL_ERR_IllegalArgument,"Unsupported encoding, use jstreams iconv based instead");

	mark(262);
    charsLeft = 0;
}
SimpleInputStreamReader::~SimpleInputStreamReader(){
	input = NULL;
}
int32_t SimpleInputStreamReader::decode(TCHAR* start, int32_t space){
	// decode from charbuf
    const char *inbuf = charbuf.readPos;
    const char *inbufend = charbuf.readPos + charbuf.avail;
    TCHAR *outbuf = start;
	const TCHAR *outbufend = outbuf + space;

	if ( encoding == ASCII ){
		while ( outbuf<outbufend && inbuf<inbufend ){
			*outbuf = *inbuf;
			outbuf++;
			inbuf++;
		}
		
#ifdef _UCS2
	}
	else if ( encoding == UCS2_LE ){
		while ( outbuf<outbufend && (inbuf+1)<inbufend ){
			uint8_t c1 = *inbuf;
			uint8_t c2 = *(inbuf+1);
			unsigned short c = c1 | (c2<<8);
			
			#ifdef _UCS2
				*outbuf = c;
			#else
				*outbuf = LUCENE_OOR_CHAR(c);
			#endif
			outbuf++;
			inbuf+=2;
		}

	}else if ( encoding == UTF8 ){
		while ( outbuf<outbufend && inbuf<inbufend ){
			size_t utflen = lucene_utf8charlen(inbuf);
			if ( utflen==0 ){
				error = "Invalid multibyte sequence.";
				status = jstreams::Error;
				return -1;
			}else if ( inbuf+utflen > inbufend ){
				break; //character incomplete
			}else{
				size_t rd = lucene_utf8towc(outbuf,inbuf,inbufend-inbuf);
				if ( rd == 0 ){
					error = "Invalid multibyte sequence.";
					status = jstreams::Error;
					return -1;
				}else{
					inbuf+=rd;
					outbuf++;
				}
			}
		}
#endif //_UCS2
	}else
		_CLTHROWA(CL_ERR_Runtime,"Unexpected encoding");

	if ( outbuf < outbufend ) { 
		//we had enough room to convert the entire input
		if ( inbuf < inbufend ) {
			// last character is incomplete
			// move from inbuf to the end to the start of
			// the buffer
			memmove(charbuf.start, inbuf, inbufend-inbuf);
			charbuf.readPos = charbuf.start;
			charbuf.avail = inbufend-inbuf;
		} else if ( outbuf < outbufend ) { //input sequence was completely converted
			charbuf.readPos = charbuf.start;
			charbuf.avail = 0;
			if (input == NULL) {
				finishedDecoding = true;
			}
		}
	} else {
        charbuf.readPos += charbuf.avail - (inbufend-inbuf);
        charbuf.avail = inbufend-inbuf;
	}
    return outbuf-start;
}

int32_t SimpleInputStreamReader::fillBuffer(TCHAR* start, int32_t space) {
    // fill up charbuf
    if (input && charbuf.readPos == charbuf.start) {
        const char *begin;
        int32_t numRead;
        numRead = input->read(begin, 1, charbuf.size - charbuf.avail);
        //printf("filled up charbuf\n");
        if (numRead < -1) {
            error = input->getError();
            status = jstreams::Error;
            input = 0;
            return numRead;
        }
        if (numRead < 1) {
            // signal end of input buffer
            input = 0;
            if (charbuf.avail) {
                error = "stream ends on incomplete character";
                status = jstreams::Error;
            }
            return -1;
        }
        // copy data into other buffer
        memmove( charbuf.start + charbuf.avail, begin, numRead * sizeof(char));
        charbuf.avail = numRead + charbuf.avail;
    }
    // decode
    int32_t n = decode(start, space);
    //printf("decoded %i\n", n);
    return n;
}

CL_NS_END

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -