首页 › 资源下载 › 其他 › 汉化CLucene今天 › 源码查看

standardfilter.cpp

来自「汉化CLucene今天」· C++ 代码 · 共 71 行

CPP

71 行

#include "CLucene/StdHeader.h"
#include "StandardFilter.h"

#include "../AnalysisHeader.h"
#include "../Analyzers.h"
#include "StandardTokenizerConstants.h"
#include "CLucene/util/StringBuffer.h"

CL_NS_USE(analysis)
CL_NS_USE(util)
CL_NS_DEF2(analysis,standard)

  StandardFilter::StandardFilter(TokenStream* in, bool deleteTokenStream):
    TokenFilter(in, deleteTokenStream)
  {
  }

  StandardFilter::~StandardFilter(){
  }

  bool StandardFilter::next(Token* t) {
    if (!input->next(t))
      return false;

    TCHAR* text = t->_termText;
    const int32_t textLength = t->termTextLength();
    const TCHAR* type = t->type();

//todo: can type be compared directly? does type get taken from the tokenImage, in which case it will be the same referenc
    if ( _tcscmp(type,tokenImage[APOSTROPHE])==0 && 
		( textLength >= 2 && _tcsicmp(text+textLength-2, _T("'s"))==0  ) )
    {
      // remove 's
      //TCHAR* buf = text; //stringDuplicate(text); -//because we are about to delete this token anyway, we can just use its buffer
      text[textLength-2]=0; 
	  t->resetTermTextLen();

      /*//here the buffer is duplicated, so can now delete buf
      Token* ret = _CLNEW Token(buf, t->StartOffset(), t->EndOffset(), type);
#ifndef LUCENE_TOKEN_WORD_LENGTH
      _CLDELETE_ARRAY(buf); //todo: check this
#endif
      // DSR:CL_BUG_LEAK: t was not deleted in this case, so it leaked.
      _CLDELETE(t);*/
      return true;

    } else if ( _tcscmp(type, tokenImage[ACRONYM])==0 ) {		  // remove dots
      /*StringBuffer trimmed;
      for (int32_t i = 0; i < textLength; i++) {
        TCHAR c = text[i];
        if (c != '.')
			trimmed.appendChar(c);
      }
      Token *ret = _CLNEW Token(trimmed.getBuffer(), t->StartOffset(), t->EndOffset(), type);
      //DSR:CL_BUG_LEAK: t was not deleted in this case, so it leaked.
      _CLDELETE(t);*/
		int32_t j = 0;
		for ( int32_t i=0;i<textLength;i++ ){
			if ( text[i] != '.' )
				text[j++]=text[i];
		}
		text[j]=0;
      return true;

    } else {
      return true;
    }
  }

CL_NS_END2

standardfilter.cpp - 源码说明

本页面展示了「汉化CLucene今天」中的 standardfilter.cpp 源码文件，采用 C++ 编程语言编写，共 71 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与CLucene相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?