⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 standardtokenizer.h

📁 汉化CLucene今天
💻 H
字号:
#ifndef _lucene_analysis_standard_StandardTokenizer
#define _lucene_analysis_standard_StandardTokenizer

#if defined(_LUCENE_PRAGMA_ONCE)
# pragma once
#endif

#include "../AnalysisHeader.h"
#include "../Analyzers.h"
#include "StandardTokenizerConstants.h"
#include "CLucene/util/StringBuffer.h"
#include "CLucene/util/FastCharStream.h"
#include "CLucene/util/Reader.h"


CL_NS_DEF2(analysis,standard)

  // This should be a good tokenizer for most European-language documents.
  class StandardTokenizer: public Tokenizer {
  private:
    int32_t rdPos;
    int32_t tokenStart;

    // Advance by one character, incrementing rdPos and returning the character.
    TCHAR readChar();
    // Retreat by one character, decrementing rdPos.
    void unReadChar();

    // createToken centralizes token creation for auditing purposes.
	//Token* createToken(CL_NS(util)::StringBuffer* sb, TokenTypes tokenCode);
    inline bool setToken(Token* t, CL_NS(util)::StringBuffer* sb, TokenTypes tokenCode);

    bool ReadDotted(CL_NS(util)::StringBuffer* str, TokenTypes forcedType,Token* t);

  public:
	CL_NS(util)::FastCharStream* rd;

    // Constructs a tokenizer for this Reader.
    StandardTokenizer(CL_NS(util)::Reader* reader);

    ~StandardTokenizer();

    void close();

	  /** Returns the next token in the stream, or false at end-of-stream.
	  * The returned token's type is set to an element of
	  * StandardTokenizerConstants::tokenImage. */
    bool next(Token* token);

    // Reads for number like "1"/"1234.567", or IP address like "192.168.1.2".
    bool ReadNumber(const TCHAR* previousNumber, const TCHAR prev, Token* t);

    bool ReadAlphaNum(const TCHAR prev, Token* t);

    // Reads for apostrophe-containing word.
    bool ReadApostrophe(CL_NS(util)::StringBuffer* str, Token* t);

    // Reads for something@... it may be a COMPANY name or a EMAIL address
    bool ReadAt(CL_NS(util)::StringBuffer* str, Token* t);

    // Reads for COMPANY name like AT&T.
    bool ReadCompany(CL_NS(util)::StringBuffer* str, Token* t);
  };

CL_NS_END2
#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -