📄 analysisheader.cpp
字号:
/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/StdHeader.h"
#include "AnalysisHeader.h"
CL_NS_USE(util)
CL_NS_DEF(analysis)
const TCHAR* Token::defaultType=_T("word");
Token::Token():
_startOffset (0),
_endOffset (0),
_type ( defaultType ),
positionIncrement (1)
{
termTextLen = 0;
#ifndef LUCENE_TOKEN_WORD_LENGTH
_termText = NULL;
bufferTextLen = 0;
#else
_termText[0] = 0; //make sure null terminated
bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1;
#endif
}
Token::~Token(){
#ifndef LUCENE_TOKEN_WORD_LENGTH
_CLDELETE_CARRAY(_termText);
#endif
}
Token::Token(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ):
_startOffset (start),
_endOffset (end),
_type ( typ ),
positionIncrement (1)
{
termTextLen = 0;
#ifndef LUCENE_TOKEN_WORD_LENGTH
_termText = NULL;
bufferTextLen = 0;
#else
_termText[0] = 0; //make sure null terminated
bufferTextLen = LUCENE_TOKEN_WORD_LENGTH+1;
#endif
setText(text);
}
void Token::set(const TCHAR* text, const int32_t start, const int32_t end, const TCHAR* typ){
_startOffset = start;
_endOffset = end;
_type = typ;
positionIncrement = 1;
setText(text);
}
void Token::setText(const TCHAR* text){
int32_t oldlen = termTextLen;
termTextLen = _tcslen(text);
#ifndef LUCENE_TOKEN_WORD_LENGTH
if ( termTextLen > oldlen || _termText == NULL ){
_CLDELETE_CARRAY(_termText);
_termText = _CL_NEWARRAY(TCHAR,termTextLen+1);
bufferTextLen = termTextLen+1;
}
_tcsncpy(_termText,text,termTextLen+1);
#else
if ( termTextLen > LUCENE_TOKEN_WORD_LENGTH ){
//in the case where this occurs, we will leave the endOffset as it is
//since the actual word still occupies that space.
termTextLen=LUCENE_TOKEN_WORD_LENGTH;
}
_tcsncpy(_termText,text,termTextLen+1);
#endif
_termText[termTextLen] = 0; //make sure null terminated
}
void Token::growBuffer(size_t size){
if(bufferTextLen>size)
return;
#ifndef LUCENE_TOKEN_WORD_LENGTH
_CLDELETE_CARRAY(_termText);
termTextLen=-1;
bufferTextLen = size+1;
_termText = _CL_NEWARRAY(TCHAR,bufferTextLen);
#else
_CLTHROWA(CL_ERR_TokenMgr,"Couldn't grow Token buffer");
#endif
}
void Token::setPositionIncrement(int32_t posIncr) {
if (posIncr < 0) {
_CLTHROWA(CL_ERR_IllegalArgument,"positionIncrement must be >= 0");
}
positionIncrement = posIncr;
}
int32_t Token::getPositionIncrement() const { return positionIncrement; }
// Returns the Token's term text.
const TCHAR* Token::termText() const{
return (const TCHAR*) _termText;
}
size_t Token::termTextLength() {
if ( termTextLen == -1 ) //it was invalidated by growBuffer
termTextLen = _tcslen(_termText);
return termTextLen;
}
void Token::resetTermTextLen(){
termTextLen=-1;
}
bool Token::OrderCompare::operator()( Token* t1, Token* t2 ) const{
if(t1->startOffset()>t2->startOffset())
return false;
if(t1->startOffset()<t2->startOffset())
return true;
return true;
}
Token* TokenStream::next(){
Token* t = _CLNEW Token;
if ( !next(t) )
_CLDELETE(t);
return t;
}
TokenFilter::TokenFilter(TokenStream* in, bool deleteTS):
input(in),
deleteTokenStream(deleteTS)
{
}
TokenFilter::~TokenFilter(){
close();
}
// Close the input TokenStream.
void TokenFilter::close() {
if ( input != NULL ){
input->close();
if ( deleteTokenStream )
_CLDELETE( input );
}
input = NULL;
}
Tokenizer::Tokenizer() {
input = NULL;
}
Tokenizer::Tokenizer(CL_NS(util)::Reader* _input):
input(_input)
{
}
void Tokenizer::close(){
if (input != NULL) {
// ? delete input;
input = NULL;
}
}
Tokenizer::~Tokenizer(){
close();
}
CL_NS_END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -