📄 lexertl_lexer.hpp
字号:
TOKEN_DATA(T_EXPORT, "export"), TOKEN_DATA(T_EXTERN, "extern"), TOKEN_DATA(T_FLOAT, "float"), TOKEN_DATA(T_FOR, "for"), TOKEN_DATA(T_FRIEND, "friend"), TOKEN_DATA(T_GOTO, "goto"), TOKEN_DATA(T_IF, "if"), TOKEN_DATA(T_INLINE, "inline"), TOKEN_DATA(T_INT, "int"), TOKEN_DATA(T_LONG, "long"), TOKEN_DATA(T_MUTABLE, "mutable"), TOKEN_DATA(T_NAMESPACE, "namespace"), TOKEN_DATA(T_NEW, "new"), TOKEN_DATA(T_OPERATOR, "operator"), TOKEN_DATA(T_PRIVATE, "private"), TOKEN_DATA(T_PROTECTED, "protected"), TOKEN_DATA(T_PUBLIC, "public"), TOKEN_DATA(T_REGISTER, "register"), TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"), TOKEN_DATA(T_RETURN, "return"), TOKEN_DATA(T_SHORT, "short"), TOKEN_DATA(T_SIGNED, "signed"), TOKEN_DATA(T_SIZEOF, "sizeof"), TOKEN_DATA(T_STATIC, "static"), TOKEN_DATA(T_STATICCAST, "static_cast"), TOKEN_DATA(T_STRUCT, "struct"), TOKEN_DATA(T_SWITCH, "switch"), TOKEN_DATA(T_TEMPLATE, "template"), TOKEN_DATA(T_THIS, "this"), TOKEN_DATA(T_THROW, "throw"), TOKEN_DATA(T_TRY, "try"), TOKEN_DATA(T_TYPEDEF, "typedef"), TOKEN_DATA(T_TYPEID, "typeid"), TOKEN_DATA(T_TYPENAME, "typename"), TOKEN_DATA(T_UNION, "union"), TOKEN_DATA(T_UNSIGNED, "unsigned"), TOKEN_DATA(T_USING, "using"), TOKEN_DATA(T_VIRTUAL, "virtual"), TOKEN_DATA(T_VOID, "void"), TOKEN_DATA(T_VOLATILE, "volatile"), TOKEN_DATA(T_WCHART, "wchar_t"), TOKEN_DATA(T_WHILE, "while"), TOKEN_DATA(T_PP_DEFINE, "{POUNDDEF}{PPSPACE}define"), TOKEN_DATA(T_PP_IF, "{POUNDDEF}{PPSPACE}if"), TOKEN_DATA(T_PP_IFDEF, "{POUNDDEF}{PPSPACE}ifdef"), TOKEN_DATA(T_PP_IFNDEF, "{POUNDDEF}{PPSPACE}ifndef"), TOKEN_DATA(T_PP_ELSE, "{POUNDDEF}{PPSPACE}else"), TOKEN_DATA(T_PP_ELIF, "{POUNDDEF}{PPSPACE}elif"), TOKEN_DATA(T_PP_ENDIF, "{POUNDDEF}{PPSPACE}endif"), TOKEN_DATA(T_PP_ERROR, "{POUNDDEF}{PPSPACE}error"), TOKEN_DATA(T_PP_QHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" Q("\"") "[^\\n\\r\"]+" Q("\"")), TOKEN_DATA(T_PP_HHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" "<" "[^\\n\\r>]+" ">"), TOKEN_DATA(T_PP_INCLUDE, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}"), TOKEN_DATA(T_PP_LINE, "{POUNDDEF}{PPSPACE}line"), TOKEN_DATA(T_PP_PRAGMA, "{POUNDDEF}{PPSPACE}pragma"), TOKEN_DATA(T_PP_UNDEF, "{POUNDDEF}{PPSPACE}undef"), TOKEN_DATA(T_PP_WARNING, "{POUNDDEF}{PPSPACE}warning"),#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 TOKEN_DATA(T_MSEXT_INT8, "__int8"), TOKEN_DATA(T_MSEXT_INT16, "__int16"), TOKEN_DATA(T_MSEXT_INT32, "__int32"), TOKEN_DATA(T_MSEXT_INT64, "__int64"), TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"), TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"), TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"), TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"), TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"), TOKEN_DATA(T_MSEXT_TRY , "__try"), TOKEN_DATA(T_MSEXT_EXCEPT, "__except"), TOKEN_DATA(T_MSEXT_FINALLY, "__finally"), TOKEN_DATA(T_MSEXT_LEAVE, "__leave"), TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"), TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"), TOKEN_DATA(T_MSEXT_PP_REGION, "{POUNDDEF}{PPSPACE}region"), TOKEN_DATA(T_MSEXT_PP_ENDREGION, "{POUNDDEF}{PPSPACE}endregion"),#endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 TOKEN_DATA(T_LONGINTLIT, "{INTEGER}{LONGINTEGER_SUFFIX}"), TOKEN_DATA(T_INTLIT, "{INTEGER}{INTEGER_SUFFIX}?"), TOKEN_DATA(T_FLOATLIT, "(" "{DIGIT}*" Q(".") "{DIGIT}+" OR "{DIGIT}+" Q(".") "){EXPONENT}?{FLOAT_SUFFIX}?" OR "{DIGIT}+{EXPONENT}{FLOAT_SUFFIX}?"),#if BOOST_WAVE_USE_STRICT_LEXER != 0 TOKEN_DATA(T_IDENTIFIER, "(" "{NONDIGIT}" OR "{UNIVERSALCHAR}" ")" "(" "{NONDIGIT}" OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),#else TOKEN_DATA(T_IDENTIFIER, "(" "{NONDIGIT}" OR Q("$") OR "{UNIVERSALCHAR}" ")" "(" "{NONDIGIT}" OR Q("$") OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),#endif TOKEN_DATA(T_CCOMMENT, "{CCOMMENT}"), TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") "{NEWLINEDEF}" ), TOKEN_DATA(T_CHARLIT, "{CHAR_SPEC}" "'" "({ESCAPESEQ}|[^\\n\\r']|{UNIVERSALCHAR})+" "'"), TOKEN_DATA(T_STRINGLIT, "{CHAR_SPEC}" Q("\"") "({ESCAPESEQ}|[^\\n\\r\"]|{UNIVERSALCHAR})*" Q("\"")), TOKEN_DATA(T_SPACE, "{BLANK}+"), TOKEN_DATA(T_CONTLINE, Q("\\") "\\n"), TOKEN_DATA(T_NEWLINE, "{NEWLINEDEF}"), TOKEN_DATA(T_POUND_POUND, "##"), TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")), TOKEN_DATA(T_POUND_POUND_TRIGRAPH, "({TRI}=){2}"), TOKEN_DATA(T_POUND, "#"), TOKEN_DATA(T_POUND_ALT, Q("%:")), TOKEN_DATA(T_POUND_TRIGRAPH, "{TRI}="), TOKEN_DATA(T_ANY_TRIGRAPH, "{TRI}\\/"), TOKEN_DATA(T_ANY, "{ANY}"), TOKEN_DATA(T_ANYCTRL, "{ANYCTRL}"), // this should be the last recognized token { token_id(0) } // this should be the last entry};// C++ only token definitionstemplate <typename Iterator, typename Position>typename lexertl<Iterator, Position>::lexer_data const lexertl<Iterator, Position>::init_data_cpp[INIT_DATA_CPP_SIZE] = { TOKEN_DATA(T_AND_ALT, "bitand"), TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"), TOKEN_DATA(T_ANDAND_ALT, "and"), TOKEN_DATA(T_OR_ALT, "bitor"), TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"), TOKEN_DATA(T_OROR_ALT, "or"), TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"), TOKEN_DATA(T_XOR_ALT, "xor"), TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"), TOKEN_DATA(T_NOT_ALT, "not"), TOKEN_DATA(T_COMPL_ALT, "compl"),#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0 TOKEN_DATA(T_IMPORT, "import"),#endif TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")), TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")), TOKEN_DATA(T_COLON_COLON, "::"), { token_id(0) } // this should be the last entry};// pp-number specific token definitionstemplate <typename Iterator, typename Position>typename lexertl<Iterator, Position>::lexer_data const lexertl<Iterator, Position>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] = { TOKEN_DATA(T_PP_NUMBER, "{PP_NUMBERDEF}"), { token_id(0) } // this should be the last entry};#undef MACRO_DATA#undef TOKEN_DATA#undef OR#undef TRI#undef Q///////////////////////////////////////////////////////////////////////////////// initialize lexertl lexer from C++ token regex'stemplate <typename Iterator, typename Position>inline boollexertl<Iterator, Position>::init_dfa(wave::language_support lang, Position const& pos, bool force_reinit){ if (has_compiled_dfa_) return true;std::ifstream dfa_in("wave_lexertl_lexer.dfa", std::ios::in|std::ios::binary); if (force_reinit || !dfa_in.is_open() || !load (dfa_in)) { dfa_in.close(); state_machine_.clear(); // register macro definitions ::lexertl::rules rules; for (int k = 0; NULL != init_macro_data[k].name; ++k) { rules.add_macro(init_macro_data[k].name, init_macro_data[k].macro); } // if pp-numbers should be preferred, insert the corresponding rule first if (wave::need_prefer_pp_numbers(lang)) { for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) { rules.add(init_data_pp_number[j].tokenregex, init_data_pp_number[j].tokenid); } } // if in C99 mode, some of the keywords are not valid if (!wave::need_c99(lang)) { for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) { rules.add(init_data_cpp[j].tokenregex, init_data_cpp[j].tokenid); } } for (int i = 0; 0 != init_data[i].tokenid; ++i) { rules.add(init_data[i].tokenregex, init_data[i].tokenid); } // generate minimized DFA try { ::lexertl::generator::build (rules, state_machine_); ::lexertl::generator::minimise_dfa (state_machine_); } catch (std::runtime_error const& e) { string_type msg("lexertl initialization error: "); msg += e.what(); BOOST_WAVE_LEXER_THROW(wave::cpplexer::lexing_exception, unexpected_error, msg.c_str(), pos.get_line(), pos.get_column(), pos.get_file().c_str()); return false; } std::ofstream dfa_out ("wave_lexertl_lexer.dfa", std::ios::out|std::ios::binary|std::ios::trunc); if (dfa_out.is_open()) save (dfa_out); } has_compiled_dfa_ = true; return true;}#endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0///////////////////////////////////////////////////////////////////////////////// return next token from the input streamtemplate <typename Iterator, typename Position>inline wave::token_id lexertl<Iterator, Position>::next_token(Iterator &first, Iterator const &last, string_type& token_value){#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0 size_t const* const lookup = &state_machine_._lookup[0]->front (); size_t const dfa_alphabet = state_machine_._dfa_alphabet[0]; size_t const* dfa = &state_machine_._dfa[0]->front(); size_t const* ptr = dfa + dfa_alphabet + ::lexertl::dfa_offset;#else const std::size_t *ptr = dfa + dfa_offset;#endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0 Iterator curr = first; Iterator end_token = first; bool end_state = (*ptr != 0); size_t id = *(ptr + 1); while (curr != last) { size_t const state = ptr[lookup[*curr]]; if (0 == state) break; ++curr;#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0 ptr = &dfa[state * (dfa_alphabet + ::lexertl::dfa_offset)];#else ptr = &dfa[state * dfa_offset];#endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0 if (0 != *ptr) { end_state = true; id = *(ptr + 1); end_token = curr; } } if (end_state) { if (T_ANY == id) { id = TOKEN_FROM_ID(*first, UnknownTokenType); } // return longest match string_type str(first, end_token); token_value.swap(str);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -