cpp_slex_lexer.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 769 行 · 第 1/2 页

HPP
769
字号
/*=============================================================================    Boost.Wave: A Standard compliant C++ preprocessor library    SLex (Spirit Lex) based C++ lexer        http://www.boost.org/    Copyright (c) 2001-2008 Hartmut Kaiser. Distributed under the Boost     Software License, Version 1.0. (See accompanying file     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)=============================================================================*/#if !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)#define SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED#include <string>#if defined(BOOST_SPIRIT_DEBUG)#include <iostream>#endif // defined(BOOST_SPIRIT_DEBUG)#include <boost/assert.hpp>#include <boost/spirit/include/classic_core.hpp>#include <boost/wave/wave_config.hpp>#include <boost/wave/language_support.hpp>#include <boost/wave/token_ids.hpp>#include <boost/wave/util/file_position.hpp>#include <boost/wave/util/time_conversion_helper.hpp>#include <boost/wave/cpplexer/validate_universal_char.hpp>#include <boost/wave/cpplexer/convert_trigraphs.hpp>#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0#include <boost/wave/cpplexer/detect_include_guards.hpp>#endif#include <boost/wave/cpplexer/cpp_lex_interface.hpp>#include "../slex_interface.hpp"#include "../slex_token.hpp"#include "../slex_iterator.hpp"#include "lexer.hpp"   // "spirit/lexer.hpp"///////////////////////////////////////////////////////////////////////////////namespace boost {namespace wave {namespace cpplexer {namespace slex {namespace lexer {/////////////////////////////////////////////////////////////////////////////////  The following numbers are the array sizes of the token regex's which we//  need to specify to make the CW compiler happy (at least up to V9.5).#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0#define INIT_DATA_SIZE              175#else#define INIT_DATA_SIZE              158#endif#define INIT_DATA_CPP_SIZE          15#define INIT_DATA_PP_NUMBER_SIZE    2///////////////////////////////////////////////////////////////////////////////// //  encapsulation of the boost::spirit::classic::slex based cpp lexer//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////  The following lexer_base class was necessary to workaround a CodeWarrior //  bug (at least up to CW V9.5).template <typename IteratorT, typename PositionT>class lexer_base :   public boost::spirit::classic::lexer<        boost::wave::util::position_iterator<IteratorT, PositionT> >{protected:    typedef boost::wave::util::position_iterator<IteratorT, PositionT>         iterator_type;    typedef typename std::iterator_traits<IteratorT>::value_type  char_type;    typedef boost::spirit::classic::lexer<iterator_type> base_type;    lexer_base();    // initialization data (regular expressions for the token definitions)    struct lexer_data {        token_id tokenid;                       // token data        char_type const *tokenregex;            // associated token to match        typename base_type::callback_t tokencb; // associated callback function        unsigned int lexerstate;                // valid for lexer state    };};///////////////////////////////////////////////////////////////////////////////template <typename IteratorT, typename PositionT>class lexer :   public lexer_base<IteratorT, PositionT>{public:    typedef boost::wave::cpplexer::slex_token<PositionT>  token_type;        void init_dfa(boost::wave::language_support language);// get time of last compilation    static std::time_t get_compilation_time()         { return compilation_time.get_time(); }// helper for calculation of the time of last compilation    static boost::wave::util::time_conversion_helper compilation_time;private:    typedef lexer_base<IteratorT, PositionT> base_type;    static typename base_type::lexer_data const init_data[INIT_DATA_SIZE];          // common patterns    static typename base_type::lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE];  // C++ only patterns    static typename base_type::lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE];  // pp-number only patterns};/////////////////////////////////////////////////////////////////////////////////  data required for initialization of the lexer (token definitions)#define OR                  "|"#define Q(c)                "\\" c#define TRI(c)              Q("?") Q("?") c// definition of some sub-token regexps to simplify the regex definitions#define BLANK               "[ \\t]"#define CCOMMENT            \    Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/")        #define PPSPACE             "(" BLANK OR CCOMMENT ")*"#define OCTALDIGIT          "[0-7]"#define DIGIT               "[0-9]"#define HEXDIGIT            "[0-9a-fA-F]"#define OPTSIGN             "[-+]?"#define EXPSTART            "[eE]" "[-+]"#define EXPONENT            "(" "[eE]" OPTSIGN "[0-9]+" ")"#define NONDIGIT            "[a-zA-Z_]"#define INTEGER             \    "(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR "[1-9]" DIGIT "*" ")"            #define INTEGER_SUFFIX      "(" "[uU][lL]?|[lL][uU]?" ")"#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0#define LONGINTEGER_SUFFIX  "(" "[uU]" "(" "[lL][lL]" ")" OR \                                "(" "[lL][lL]" ")" "[uU]" "?" OR \                                "i64" \                            ")" #else#define LONGINTEGER_SUFFIX  "(" "[uU]" "(" "[lL][lL]" ")" OR \                            "(" "[lL][lL]" ")" "[uU]" "?" ")"#endif#define FLOAT_SUFFIX        "(" "[fF][lL]?" OR "[lL][fF]?" ")"#define CHAR_SPEC           "L?"#define BACKSLASH           "(" Q("\\") OR TRI(Q("/")) ")"#define ESCAPESEQ           "(" BACKSLASH "(" \                                "[abfnrtv?'\"]" OR \                                BACKSLASH OR \                                "x" HEXDIGIT "+" OR \                                OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \                            "))"#define HEXQUAD             "(" HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT ")"#define UNIVERSALCHAR       "(" BACKSLASH "(" \                                "u" HEXQUAD OR \                                "U" HEXQUAD HEXQUAD \                            "))" #define POUNDDEF            "(" "#" OR TRI("=") OR Q("%:") ")"#define NEWLINEDEF          "(" "\n" OR "\r" OR "\r\n" ")"#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0#define INCLUDEDEF          "(include|include_next)"#else#define INCLUDEDEF          "include"#endif#define PP_NUMBERDEF        Q(".") "?" DIGIT "(" DIGIT OR NONDIGIT OR EXPSTART OR Q(".") ")*"/////////////////////////////////////////////////////////////////////////////////  lexer state constants#define LEXER_STATE_NORMAL  0#define LEXER_STATE_PP      1#define NUM_LEXER_STATES    1//  helper for initializing token data#define TOKEN_DATA(id, regex)                                                 \        { T_##id, regex, 0, LEXER_STATE_NORMAL }                              \    /**/#define TOKEN_DATA_EX(id, regex, callback)                                    \        { T_##id, regex, callback, LEXER_STATE_NORMAL }                       \    /**////////////////////////////////////////////////////////////////////////////////// common C++/C99 token definitionstemplate <typename IteratorT, typename PositionT>typename lexer_base<IteratorT, PositionT>::lexer_data const lexer<IteratorT, PositionT>::init_data[INIT_DATA_SIZE] = {    TOKEN_DATA(AND, "&"),    TOKEN_DATA(ANDAND, "&&"),    TOKEN_DATA(ASSIGN, "="),    TOKEN_DATA(ANDASSIGN, "&="),    TOKEN_DATA(OR, Q("|")),    TOKEN_DATA(OR_TRIGRAPH, TRI("!")),    TOKEN_DATA(ORASSIGN, Q("|=")),    TOKEN_DATA(ORASSIGN_TRIGRAPH, TRI("!=")),    TOKEN_DATA(XOR, Q("^")),    TOKEN_DATA(XOR_TRIGRAPH, TRI("'")),    TOKEN_DATA(XORASSIGN, Q("^=")),    TOKEN_DATA(XORASSIGN_TRIGRAPH, TRI("'=")),    TOKEN_DATA(COMMA, ","),    TOKEN_DATA(COLON, ":"),    TOKEN_DATA(DIVIDEASSIGN, Q("/=")),    TOKEN_DATA(DIVIDE, Q("/")),    TOKEN_DATA(DOT, Q(".")),    TOKEN_DATA(ELLIPSIS, Q(".") Q(".") Q(".")),    TOKEN_DATA(EQUAL, "=="),    TOKEN_DATA(GREATER, ">"),    TOKEN_DATA(GREATEREQUAL, ">="),    TOKEN_DATA(LEFTBRACE, Q("{")),    TOKEN_DATA(LEFTBRACE_ALT, "<" Q("%")),    TOKEN_DATA(LEFTBRACE_TRIGRAPH, TRI("<")),    TOKEN_DATA(LESS, "<"),    TOKEN_DATA(LESSEQUAL, "<="),    TOKEN_DATA(LEFTPAREN, Q("(")),    TOKEN_DATA(LEFTBRACKET, Q("[")),    TOKEN_DATA(LEFTBRACKET_ALT, "<:"),    TOKEN_DATA(LEFTBRACKET_TRIGRAPH, TRI(Q("("))),    TOKEN_DATA(MINUS, Q("-")),    TOKEN_DATA(MINUSASSIGN, Q("-=")),    TOKEN_DATA(MINUSMINUS, Q("-") Q("-")),    TOKEN_DATA(PERCENT, Q("%")),    TOKEN_DATA(PERCENTASSIGN, Q("%=")),    TOKEN_DATA(NOT, "!"),    TOKEN_DATA(NOTEQUAL, "!="),    TOKEN_DATA(OROR, Q("|") Q("|")),    TOKEN_DATA(OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),    TOKEN_DATA(PLUS, Q("+")),    TOKEN_DATA(PLUSASSIGN, Q("+=")),    TOKEN_DATA(PLUSPLUS, Q("+") Q("+")),    TOKEN_DATA(ARROW, Q("->")),    TOKEN_DATA(QUESTION_MARK, Q("?")),    TOKEN_DATA(RIGHTBRACE, Q("}")),    TOKEN_DATA(RIGHTBRACE_ALT, Q("%>")),    TOKEN_DATA(RIGHTBRACE_TRIGRAPH, TRI(">")),    TOKEN_DATA(RIGHTPAREN, Q(")")),    TOKEN_DATA(RIGHTBRACKET, Q("]")),    TOKEN_DATA(RIGHTBRACKET_ALT, ":>"),    TOKEN_DATA(RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),    TOKEN_DATA(SEMICOLON, ";"),    TOKEN_DATA(SHIFTLEFT, "<<"),    TOKEN_DATA(SHIFTLEFTASSIGN, "<<="),    TOKEN_DATA(SHIFTRIGHT, ">>"),    TOKEN_DATA(SHIFTRIGHTASSIGN, ">>="),    TOKEN_DATA(STAR, Q("*")),    TOKEN_DATA(COMPL, Q("~")),    TOKEN_DATA(COMPL_TRIGRAPH, TRI("-")),    TOKEN_DATA(STARASSIGN, Q("*=")),    TOKEN_DATA(ASM, "asm"),    TOKEN_DATA(AUTO, "auto"),    TOKEN_DATA(BOOL, "bool"),    TOKEN_DATA(FALSE, "false"),    TOKEN_DATA(TRUE, "true"),    TOKEN_DATA(BREAK, "break"),    TOKEN_DATA(CASE, "case"),    TOKEN_DATA(CATCH, "catch"),    TOKEN_DATA(CHAR, "char"),    TOKEN_DATA(CLASS, "class"),    TOKEN_DATA(CONST, "const"),    TOKEN_DATA(CONSTCAST, "const_cast"),    TOKEN_DATA(CONTINUE, "continue"),    TOKEN_DATA(DEFAULT, "default"),    TOKEN_DATA(DELETE, "delete"),    TOKEN_DATA(DO, "do"),    TOKEN_DATA(DOUBLE, "double"),    TOKEN_DATA(DYNAMICCAST, "dynamic_cast"),    TOKEN_DATA(ELSE, "else"),    TOKEN_DATA(ENUM, "enum"),    TOKEN_DATA(EXPLICIT, "explicit"),    TOKEN_DATA(EXPORT, "export"),    TOKEN_DATA(EXTERN, "extern"),    TOKEN_DATA(FLOAT, "float"),    TOKEN_DATA(FOR, "for"),    TOKEN_DATA(FRIEND, "friend"),    TOKEN_DATA(GOTO, "goto"),    TOKEN_DATA(IF, "if"),    TOKEN_DATA(INLINE, "inline"),    TOKEN_DATA(INT, "int"),    TOKEN_DATA(LONG, "long"),    TOKEN_DATA(MUTABLE, "mutable"),    TOKEN_DATA(NAMESPACE, "namespace"),    TOKEN_DATA(NEW, "new"),    TOKEN_DATA(OPERATOR, "operator"),    TOKEN_DATA(PRIVATE, "private"),    TOKEN_DATA(PROTECTED, "protected"),    TOKEN_DATA(PUBLIC, "public"),    TOKEN_DATA(REGISTER, "register"),    TOKEN_DATA(REINTERPRETCAST, "reinterpret_cast"),    TOKEN_DATA(RETURN, "return"),    TOKEN_DATA(SHORT, "short"),    TOKEN_DATA(SIGNED, "signed"),    TOKEN_DATA(SIZEOF, "sizeof"),    TOKEN_DATA(STATIC, "static"),    TOKEN_DATA(STATICCAST, "static_cast"),    TOKEN_DATA(STRUCT, "struct"),    TOKEN_DATA(SWITCH, "switch"),    TOKEN_DATA(TEMPLATE, "template"),    TOKEN_DATA(THIS, "this"),    TOKEN_DATA(THROW, "throw"),    TOKEN_DATA(TRY, "try"),    TOKEN_DATA(TYPEDEF, "typedef"),    TOKEN_DATA(TYPEID, "typeid"),    TOKEN_DATA(TYPENAME, "typename"),    TOKEN_DATA(UNION, "union"),    TOKEN_DATA(UNSIGNED, "unsigned"),    TOKEN_DATA(USING, "using"),    TOKEN_DATA(VIRTUAL, "virtual"),    TOKEN_DATA(VOID, "void"),    TOKEN_DATA(VOLATILE, "volatile"),    TOKEN_DATA(WCHART, "wchar_t"),    TOKEN_DATA(WHILE, "while"),    TOKEN_DATA(PP_DEFINE, POUNDDEF PPSPACE "define"),    TOKEN_DATA(PP_IF, POUNDDEF PPSPACE "if"),    TOKEN_DATA(PP_IFDEF, POUNDDEF PPSPACE "ifdef"),    TOKEN_DATA(PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),    TOKEN_DATA(PP_ELSE, POUNDDEF PPSPACE "else"),    TOKEN_DATA(PP_ELIF, POUNDDEF PPSPACE "elif"),    TOKEN_DATA(PP_ENDIF, POUNDDEF PPSPACE "endif"),    TOKEN_DATA(PP_ERROR, POUNDDEF PPSPACE "error"),    TOKEN_DATA(PP_QHEADER, POUNDDEF PPSPACE \        INCLUDEDEF PPSPACE Q("\"") "[^\\n\\r\"]+" Q("\"")),    TOKEN_DATA(PP_HHEADER, POUNDDEF PPSPACE \        INCLUDEDEF PPSPACE "<" "[^\\n\\r>]+" ">"),    TOKEN_DATA(PP_INCLUDE, POUNDDEF PPSPACE \        INCLUDEDEF PPSPACE),    TOKEN_DATA(PP_LINE, POUNDDEF PPSPACE "line"),    TOKEN_DATA(PP_PRAGMA, POUNDDEF PPSPACE "pragma"),    TOKEN_DATA(PP_UNDEF, POUNDDEF PPSPACE "undef"),    TOKEN_DATA(PP_WARNING, POUNDDEF PPSPACE "warning"),#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0    TOKEN_DATA(MSEXT_INT8, "__int8"),    TOKEN_DATA(MSEXT_INT16, "__int16"),    TOKEN_DATA(MSEXT_INT32, "__int32"),    TOKEN_DATA(MSEXT_INT64, "__int64"),    TOKEN_DATA(MSEXT_BASED, "_?" "_based"),    TOKEN_DATA(MSEXT_DECLSPEC, "_?" "_declspec"),    TOKEN_DATA(MSEXT_CDECL, "_?" "_cdecl"),    TOKEN_DATA(MSEXT_FASTCALL, "_?" "_fastcall"),    TOKEN_DATA(MSEXT_STDCALL, "_?" "_stdcall"),    TOKEN_DATA(MSEXT_TRY , "__try"),    TOKEN_DATA(MSEXT_EXCEPT, "__except"),    TOKEN_DATA(MSEXT_FINALLY, "__finally"),    TOKEN_DATA(MSEXT_LEAVE, "__leave"),    TOKEN_DATA(MSEXT_INLINE, "_?" "_inline"),    TOKEN_DATA(MSEXT_ASM, "_?" "_asm"),    TOKEN_DATA(MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),    TOKEN_DATA(MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),#endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0//  TOKEN_DATA(OCTALINT, "0" OCTALDIGIT "*" INTEGER_SUFFIX "?"),//  TOKEN_DATA(DECIMALINT, "[1-9]" DIGIT "*" INTEGER_SUFFIX "?"),//  TOKEN_DATA(HEXAINT, "(0x|0X)" HEXDIGIT "+" INTEGER_SUFFIX "?"),    TOKEN_DATA(LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),    TOKEN_DATA(INTLIT, INTEGER INTEGER_SUFFIX "?"),    TOKEN_DATA(FLOATLIT,         "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"         EXPONENT "?" FLOAT_SUFFIX "?" OR        DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),#if BOOST_WAVE_USE_STRICT_LEXER != 0    TOKEN_DATA(IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),#else    TOKEN_DATA(IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),#endif    TOKEN_DATA(CCOMMENT, CCOMMENT),    TOKEN_DATA(CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") NEWLINEDEF ),    TOKEN_DATA(CHARLIT, CHAR_SPEC "'"                 "(" ESCAPESEQ OR "[^\\n\\r']" OR UNIVERSALCHAR ")+" "'"),    TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"")                 "(" ESCAPESEQ OR "[^\\n\\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),    TOKEN_DATA(SPACE, "[ \t\v\f]+"),//    TOKEN_DATA(SPACE2, "[\\v\\f]+"),    TOKEN_DATA(CONTLINE, "\\" "\\n"),     TOKEN_DATA(NEWLINE, NEWLINEDEF),    TOKEN_DATA(POUND_POUND, "##"),    TOKEN_DATA(POUND_POUND_ALT, Q("%:") Q("%:")),

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?