📄 xlex_lexer.hpp
字号:
/*============================================================================= Boost.Wave: A Standard compliant C++ preprocessor library Xpressive based C++ lexer http://www.boost.org/ Copyright (c) 2001-2008 Hartmut Kaiser. Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)=============================================================================*/#if !defined(XLEX_LEXER_HPP)#define XLEX_LEXER_HPP#include <string>#include <cstdio>#include <cstdarg>#if defined(BOOST_SPIRIT_DEBUG)#include <iostream>#endif // defined(BOOST_SPIRIT_DEBUG)#include <boost/concept_check.hpp>#include <boost/assert.hpp>#include <boost/spirit/include/classic_core.hpp>#include <boost/wave/token_ids.hpp>#include <boost/wave/language_support.hpp>#include <boost/wave/util/file_position.hpp>#include <boost/wave/cpplexer/validate_universal_char.hpp>#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0#include <boost/wave/cpplexer/detect_include_guards.hpp>#endif#include <boost/wave/cpplexer/cpp_lex_interface.hpp>// reuse the default token type #include "../xlex_iterator.hpp"// include the xpressive headers#include "xpressive_lexer.hpp"///////////////////////////////////////////////////////////////////////////////namespace boost {namespace wave {namespace cpplexer {namespace xlex {namespace lexer {///////////////////////////////////////////////////////////////////////////////// // encapsulation of the xpressive based C++ lexer/////////////////////////////////////////////////////////////////////////////////template < typename Iterator, typename Position = boost::wave::util::file_position_type>class lexer {public: typedef char char_type; typedef boost::wave::cpplexer::lex_token<Position> token_type; typedef typename token_type::string_type string_type; lexer(Iterator const &first, Iterator const &last, Position const &pos, boost::wave::language_support language); ~lexer() {} boost::wave::cpplexer::lex_token<Position> get(); void set_position(Position const &pos) { // set position has to change the file name and line number only filename = pos.get_file(); line = pos.get_line(); }#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 bool has_include_guards(std::string& guard_name) const { return guards.detected(guard_name); }#endifprivate: typedef xpressive_lexer<Iterator, token_id> lexer_type; typedef typename lexer_type::callback_type callback_type; lexer_type xlexer; Iterator first; Iterator last; string_type filename; int line; bool at_eof; boost::wave::language_support language;// initialization data (regular expressions for the token definitions) struct lexer_data { token_id tokenid; // token data char_type const *tokenregex; // associated token to match callback_type tokencb; // associated callback function }; static lexer_data const init_data[]; // common patterns static lexer_data const init_data_cpp[]; // C++ only patterns#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 boost::wave::cpplexer::include_guards<token_type> guards;#endif};///////////////////////////////////////////////////////////////////////////////// helper for initializing token data#define TOKEN_DATA(id, regex) \ { id, regex, 0 }#define TOKEN_DATA_EX(id, regex, callback) \ { id, regex, callback }///////////////////////////////////////////////////////////////////////////////// data required for initialization of the lexer (token definitions)#define OR "|"#define Q(c) "\\" c#define TRI(c) Q("?") Q("?") c// definition of some subtoken regexps to simplify the regex definitions#define BLANK "[ \t]"#define CCOMMENT Q("/") Q("*") ".*?" Q("*") Q("/") #define PPSPACE "(" BLANK OR CCOMMENT ")*"#define OCTALDIGIT "[0-7]"#define DIGIT "[0-9]"#define HEXDIGIT "[0-9a-fA-F]"#define SIGN "[-+]?"#define EXPONENT "(" "[eE]" SIGN "[0-9]+" ")"#define INTEGER "(" \ "(0x|0X)" HEXDIGIT "+" OR \ "0" OCTALDIGIT "*" OR \ "[1-9]" DIGIT "*" \ ")" #define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \ "(" "[lL][lL]" ")" "[uU]" "?" OR \ "i64" \ ")" #else#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \ "(" "[lL][lL]" ")" "[uU]" "?" ")"#endif#define FLOAT_SUFFIX "(" "[fF][lL]?|[lL][fF]?" ")"#define CHAR_SPEC "L?"#define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"#define ESCAPESEQ BACKSLASH "(" \ "[abfnrtv?'\"]" OR \ BACKSLASH OR \ "x" HEXDIGIT "+" OR \ OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \ ")"#define HEXQUAD HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT #define UNIVERSALCHAR BACKSLASH "(" \ "u" HEXQUAD OR \ "U" HEXQUAD HEXQUAD \ ")" #define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"#define NEWLINEDEF "(" "\n" OR "\r\n" OR "\r" ")"#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0#define INCLUDEDEF "(include_next|include)"#else#define INCLUDEDEF "include"#endif///////////////////////////////////////////////////////////////////////////////// common C++/C99 token definitionstemplate <typename Iterator, typename Position>typename lexer<Iterator, Position>::lexer_data const lexer<Iterator, Position>::init_data[] = { TOKEN_DATA(T_CCOMMENT, CCOMMENT), TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/.*?") NEWLINEDEF ), TOKEN_DATA(T_CHARLIT, CHAR_SPEC "'" "(" ESCAPESEQ OR "[^\n\r']" OR UNIVERSALCHAR ")+" "'"), TOKEN_DATA(T_STRINGLIT, CHAR_SPEC Q("\"") "(" ESCAPESEQ OR "[^\n\r\"]" OR UNIVERSALCHAR ")*" Q("\"")), TOKEN_DATA(T_ANDAND, "&&"), TOKEN_DATA(T_ANDASSIGN, "&="), TOKEN_DATA(T_AND, "&"), TOKEN_DATA(T_EQUAL, "=="), TOKEN_DATA(T_ASSIGN, "="), TOKEN_DATA(T_ORASSIGN, Q("|=")), TOKEN_DATA(T_ORASSIGN_TRIGRAPH, TRI("!=")), TOKEN_DATA(T_OROR, Q("|") Q("|")), TOKEN_DATA(T_OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")), TOKEN_DATA(T_OR, Q("|")), TOKEN_DATA(T_OR_TRIGRAPH, TRI("!")), TOKEN_DATA(T_XORASSIGN, Q("^=")), TOKEN_DATA(T_XORASSIGN_TRIGRAPH, TRI("'=")), TOKEN_DATA(T_XOR, Q("^")), TOKEN_DATA(T_XOR_TRIGRAPH, TRI("'")), TOKEN_DATA(T_COMMA, ","), TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"), TOKEN_DATA(T_COLON, ":"), TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")), TOKEN_DATA(T_DIVIDE, Q("/")), TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")), TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="), TOKEN_DATA(T_SHIFTRIGHT, ">>"), TOKEN_DATA(T_GREATEREQUAL, ">="), TOKEN_DATA(T_GREATER, ">"), TOKEN_DATA(T_LEFTBRACE, Q("{")), TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="), TOKEN_DATA(T_SHIFTLEFT, "<<"), TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")), TOKEN_DATA(T_LESSEQUAL, "<="), TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"), TOKEN_DATA(T_LESS, "<"), TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, TRI("<")), TOKEN_DATA(T_LEFTPAREN, Q("(")), TOKEN_DATA(T_LEFTBRACKET, Q("[")), TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, TRI(Q("("))), TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")), TOKEN_DATA(T_MINUSASSIGN, Q("-=")), TOKEN_DATA(T_ARROW, Q("->")), TOKEN_DATA(T_MINUS, Q("-")), TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")), TOKEN_DATA(T_PERCENTASSIGN, Q("%=")), TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")), TOKEN_DATA(T_POUND_ALT, Q("%:")), TOKEN_DATA(T_PERCENT, Q("%")), TOKEN_DATA(T_NOTEQUAL, "!="), TOKEN_DATA(T_NOT, "!"), TOKEN_DATA(T_PLUSASSIGN, Q("+=")), TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")), TOKEN_DATA(T_PLUS, Q("+")), TOKEN_DATA(T_RIGHTBRACE, Q("}")), TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, TRI(">")), TOKEN_DATA(T_RIGHTPAREN, Q(")")), TOKEN_DATA(T_RIGHTBRACKET, Q("]")), TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))), TOKEN_DATA(T_SEMICOLON, ";"), TOKEN_DATA(T_STARASSIGN, Q("*=")), TOKEN_DATA(T_STAR, Q("*")), TOKEN_DATA(T_COMPL, Q("~")), TOKEN_DATA(T_COMPL_TRIGRAPH, TRI("-")), TOKEN_DATA(T_ASM, "asm"), TOKEN_DATA(T_AUTO, "auto"), TOKEN_DATA(T_BOOL, "bool"), TOKEN_DATA(T_FALSE, "false"), TOKEN_DATA(T_TRUE, "true"), TOKEN_DATA(T_BREAK, "break"), TOKEN_DATA(T_CASE, "case"), TOKEN_DATA(T_CATCH, "catch"), TOKEN_DATA(T_CHAR, "char"), TOKEN_DATA(T_CLASS, "class"), TOKEN_DATA(T_CONSTCAST, "const_cast"), TOKEN_DATA(T_CONST, "const"), TOKEN_DATA(T_CONTINUE, "continue"), TOKEN_DATA(T_DEFAULT, "default"), TOKEN_DATA(T_DELETE, "delete"), TOKEN_DATA(T_DOUBLE, "double"), TOKEN_DATA(T_DO, "do"), TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"), TOKEN_DATA(T_ELSE, "else"), TOKEN_DATA(T_ENUM, "enum"), TOKEN_DATA(T_EXPLICIT, "explicit"), TOKEN_DATA(T_EXPORT, "export"), TOKEN_DATA(T_EXTERN, "extern"), TOKEN_DATA(T_FLOAT, "float"), TOKEN_DATA(T_FOR, "for"), TOKEN_DATA(T_FRIEND, "friend"), TOKEN_DATA(T_GOTO, "goto"), TOKEN_DATA(T_IF, "if"), TOKEN_DATA(T_INLINE, "inline"), TOKEN_DATA(T_INT, "int"), TOKEN_DATA(T_LONG, "long"), TOKEN_DATA(T_MUTABLE, "mutable"), TOKEN_DATA(T_NAMESPACE, "namespace"), TOKEN_DATA(T_NEW, "new"), TOKEN_DATA(T_OPERATOR, "operator"), TOKEN_DATA(T_PRIVATE, "private"), TOKEN_DATA(T_PROTECTED, "protected"), TOKEN_DATA(T_PUBLIC, "public"), TOKEN_DATA(T_REGISTER, "register"), TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"), TOKEN_DATA(T_RETURN, "return"), TOKEN_DATA(T_SHORT, "short"), TOKEN_DATA(T_SIGNED, "signed"), TOKEN_DATA(T_SIZEOF, "sizeof"),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -