📄 lexertl_lexer.hpp
字号:
/*============================================================================= Boost.Wave: A Standard compliant C++ preprocessor library http://www.boost.org/ Copyright (c) 2001-2008 Hartmut Kaiser. Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)=============================================================================*/#if !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)#define BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED#include <fstream>#include <boost/iterator/iterator_traits.hpp>#include <boost/wave/wave_config.hpp>#include <boost/wave/language_support.hpp>#include <boost/wave/token_ids.hpp>#include <boost/wave/util/time_conversion_helper.hpp>#include <boost/wave/cpplexer/validate_universal_char.hpp>#include <boost/wave/cpplexer/convert_trigraphs.hpp>#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0#include <boost/wave/cpplexer/detect_include_guards.hpp>#endif#include "wave_lexertl_config.hpp"#include "../lexertl_iterator.hpp"#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0#include "wave_lexertl_tables.hpp"#else#include "lexertl/generator.hpp"#include "lexertl/rules.hpp"#include "lexertl/state_machine.hpp"#include "lexertl/consts.h"#include "lexertl/examples/serialise.hpp"#if BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE != 0#include "lexertl/examples/cpp_code.hpp"#endif#endif///////////////////////////////////////////////////////////////////////////////namespace boost { namespace wave { namespace cpplexer { namespace lexertl {#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0///////////////////////////////////////////////////////////////////////////////// The following numbers are the array sizes of the token regex's which we// need to specify to make the CW compiler happy (at least up to V9.5).#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0#define INIT_DATA_SIZE 176#else#define INIT_DATA_SIZE 159#endif#define INIT_DATA_CPP_SIZE 15#define INIT_DATA_PP_NUMBER_SIZE 2#define INIT_MACRO_DATA_SIZE 27#endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0// this is just a hack to have a unique token id not otherwise used by Wave#define T_ANYCTRL T_LAST_TOKEN_ID///////////////////////////////////////////////////////////////////////////////namespace lexer{///////////////////////////////////////////////////////////////////////////////// this is the wrapper for the lexertl lexer librarytemplate <typename Iterator, typename Position>class lexertl {private: typedef BOOST_WAVE_STRINGTYPE string_type; typedef typename boost::detail::iterator_traits<Iterator>::value_type char_type;public: wave::token_id next_token(Iterator &first, Iterator const &last, string_type& token_value); #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0 lexertl() {} void init_dfa(wave::language_support lang, Position const& pos, bool force_reinit = false) {} bool is_initialized() const { return true; }#else lexertl() : has_compiled_dfa_(false) {} bool init_dfa(wave::language_support lang, Position const& pos, bool force_reinit = false); bool is_initialized() const { return has_compiled_dfa_; } // get time of last compilation static std::time_t get_compilation_time() { return compilation_time.get_time(); } bool load (istream& instrm); bool save (ostream& outstrm); private: ::lexertl::state_machine state_machine_; bool has_compiled_dfa_; // initialization data (regular expressions for the token definitions) struct lexer_macro_data { char_type const *name; // macro name char_type const *macro; // associated macro definition }; static lexer_macro_data const init_macro_data[INIT_MACRO_DATA_SIZE]; // macro patterns struct lexer_data { token_id tokenid; // token data char_type const *tokenregex; // associated token to match }; static lexer_data const init_data[INIT_DATA_SIZE]; // common patterns static lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE]; // C++ only patterns static lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE]; // pp-number only patterns// helper for calculation of the time of last compilation static boost::wave::util::time_conversion_helper compilation_time;#endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0};#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0///////////////////////////////////////////////////////////////////////////////// get time of last compilation of this filetemplate <typename IteratorT, typename PositionT>boost::wave::util::time_conversion_helper lexertl<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);///////////////////////////////////////////////////////////////////////////////// token regex definitions// helper for initializing token data and macro definitions#define Q(c) "\\" c#define TRI(c) "{TRI}" c#define OR "|"#define MACRO_DATA(name, macro) { name, macro }#define TOKEN_DATA(id, regex) { id, regex }// lexertl macro definitionstemplate <typename Iterator, typename Position>typename lexertl<Iterator, Position>::lexer_macro_data const lexertl<Iterator, Position>::init_macro_data[INIT_MACRO_DATA_SIZE] = { MACRO_DATA("ANY", "[\t\v\f\r\n\\040-\\377]"), MACRO_DATA("ANYCTRL", "[\\000-\\037]"), MACRO_DATA("TRI", "\\?\\?"), MACRO_DATA("BLANK", "[ \t\v\f]"), MACRO_DATA("CCOMMENT", "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"), MACRO_DATA("PPSPACE", "(" "{BLANK}" OR "{CCOMMENT}" ")*"), MACRO_DATA("OCTALDIGIT", "[0-7]"), MACRO_DATA("DIGIT", "[0-9]"), MACRO_DATA("HEXDIGIT", "[0-9a-fA-F]"), MACRO_DATA("OPTSIGN", "[-+]?"), MACRO_DATA("EXPSTART", "[eE][-+]"), MACRO_DATA("EXPONENT", "([eE]{OPTSIGN}{DIGIT}+)"), MACRO_DATA("NONDIGIT", "[a-zA-Z_]"), MACRO_DATA("INTEGER", "(" "(0x|0X){HEXDIGIT}+" OR "0{OCTALDIGIT}*" OR "[1-9]{DIGIT}*" ")"), MACRO_DATA("INTEGER_SUFFIX", "(" "[uU][lL]?" OR "[lL][uU]?" ")"),#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 MACRO_DATA("LONGINTEGER_SUFFIX", "([uU]([lL][lL])|([lL][lL])[uU]?|i64)"),#else MACRO_DATA("LONGINTEGER_SUFFIX", "([uU]([lL][lL])|([lL][lL])[uU]?)"),#endif MACRO_DATA("FLOAT_SUFFIX", "(" "[fF][lL]?" OR "[lL][fF]?" ")"), MACRO_DATA("CHAR_SPEC", "L?"), MACRO_DATA("BACKSLASH", "(" Q("\\") OR TRI(Q("/")) ")"), MACRO_DATA("ESCAPESEQ", "{BACKSLASH}([abfnrtv?'\"]|{BACKSLASH}|x{HEXDIGIT}+|{OCTALDIGIT}{1,3})"), MACRO_DATA("HEXQUAD", "{HEXDIGIT}{4}"), MACRO_DATA("UNIVERSALCHAR", "{BACKSLASH}(u{HEXQUAD}|U{HEXQUAD}{2})"), MACRO_DATA("POUNDDEF", "(" "#" OR TRI("=") OR Q("%:") ")"), MACRO_DATA("NEWLINEDEF", "(" "\\n" OR "\\r" OR "\\r\\n" ")"),#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 MACRO_DATA("INCLUDEDEF", "(include|include_next)"),#else MACRO_DATA("INCLUDEDEF", "include"),#endif MACRO_DATA("PP_NUMBERDEF", "\\.?{DIGIT}({DIGIT}|{NONDIGIT}|{EXPSTART}|\\.)*"), MACRO_DATA(NULL, NULL) // should be the last entry};// common C++/C99 token definitionstemplate <typename Iterator, typename Position>typename lexertl<Iterator, Position>::lexer_data const lexertl<Iterator, Position>::init_data[INIT_DATA_SIZE] = { TOKEN_DATA(T_AND, "&"), TOKEN_DATA(T_ANDAND, "&&"), TOKEN_DATA(T_ASSIGN, "="), TOKEN_DATA(T_ANDASSIGN, "&="), TOKEN_DATA(T_OR, Q("|")), TOKEN_DATA(T_OR_TRIGRAPH, "{TRI}!"), TOKEN_DATA(T_ORASSIGN, Q("|=")), TOKEN_DATA(T_ORASSIGN_TRIGRAPH, "{TRI}!="), TOKEN_DATA(T_XOR, Q("^")), TOKEN_DATA(T_XOR_TRIGRAPH, "{TRI}'"), TOKEN_DATA(T_XORASSIGN, Q("^=")), TOKEN_DATA(T_XORASSIGN_TRIGRAPH, "{TRI}'="), TOKEN_DATA(T_COMMA, ","), TOKEN_DATA(T_COLON, ":"), TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")), TOKEN_DATA(T_DIVIDE, Q("/")), TOKEN_DATA(T_DOT, Q(".")), TOKEN_DATA(T_ELLIPSIS, Q(".") "{3}"), TOKEN_DATA(T_EQUAL, "=="), TOKEN_DATA(T_GREATER, ">"), TOKEN_DATA(T_GREATEREQUAL, ">="), TOKEN_DATA(T_LEFTBRACE, Q("{")), TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")), TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, "{TRI}<"), TOKEN_DATA(T_LESS, "<"), TOKEN_DATA(T_LESSEQUAL, "<="), TOKEN_DATA(T_LEFTPAREN, Q("(")), TOKEN_DATA(T_LEFTBRACKET, Q("[")), TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"), TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, "{TRI}" Q("(")), TOKEN_DATA(T_MINUS, Q("-")), TOKEN_DATA(T_MINUSASSIGN, Q("-=")), TOKEN_DATA(T_MINUSMINUS, Q("-") "{2}"), TOKEN_DATA(T_PERCENT, Q("%")), TOKEN_DATA(T_PERCENTASSIGN, Q("%=")), TOKEN_DATA(T_NOT, "!"), TOKEN_DATA(T_NOTEQUAL, "!="), TOKEN_DATA(T_OROR, Q("|") "{2}"), TOKEN_DATA(T_OROR_TRIGRAPH, "{TRI}!\\||\\|{TRI}!|{TRI}!{TRI}!"), TOKEN_DATA(T_PLUS, Q("+")), TOKEN_DATA(T_PLUSASSIGN, Q("+=")), TOKEN_DATA(T_PLUSPLUS, Q("+") "{2}"), TOKEN_DATA(T_ARROW, Q("->")), TOKEN_DATA(T_QUESTION_MARK, Q("?")), TOKEN_DATA(T_RIGHTBRACE, Q("}")), TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")), TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, "{TRI}>"), TOKEN_DATA(T_RIGHTPAREN, Q(")")), TOKEN_DATA(T_RIGHTBRACKET, Q("]")), TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"), TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, "{TRI}" Q(")")), TOKEN_DATA(T_SEMICOLON, ";"), TOKEN_DATA(T_SHIFTLEFT, "<<"), TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="), TOKEN_DATA(T_SHIFTRIGHT, ">>"), TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="), TOKEN_DATA(T_STAR, Q("*")), TOKEN_DATA(T_COMPL, Q("~")), TOKEN_DATA(T_COMPL_TRIGRAPH, "{TRI}-"), TOKEN_DATA(T_STARASSIGN, Q("*=")), TOKEN_DATA(T_ASM, "asm"), TOKEN_DATA(T_AUTO, "auto"), TOKEN_DATA(T_BOOL, "bool"), TOKEN_DATA(T_FALSE, "false"), TOKEN_DATA(T_TRUE, "true"), TOKEN_DATA(T_BREAK, "break"), TOKEN_DATA(T_CASE, "case"), TOKEN_DATA(T_CATCH, "catch"), TOKEN_DATA(T_CHAR, "char"), TOKEN_DATA(T_CLASS, "class"), TOKEN_DATA(T_CONST, "const"), TOKEN_DATA(T_CONSTCAST, "const_cast"), TOKEN_DATA(T_CONTINUE, "continue"), TOKEN_DATA(T_DEFAULT, "default"), TOKEN_DATA(T_DELETE, "delete"), TOKEN_DATA(T_DO, "do"), TOKEN_DATA(T_DOUBLE, "double"), TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"), TOKEN_DATA(T_ELSE, "else"), TOKEN_DATA(T_ENUM, "enum"), TOKEN_DATA(T_EXPLICIT, "explicit"),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -