cpp_slex_lexer.hpp
来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 769 行 · 第 1/2 页
HPP
769 行
TOKEN_DATA(POUND_POUND_TRIGRAPH, TRI("=") TRI("=")), TOKEN_DATA(POUND, "#"), TOKEN_DATA(POUND_ALT, Q("%:")), TOKEN_DATA(POUND_TRIGRAPH, TRI("=")), TOKEN_DATA(ANY_TRIGRAPH, TRI(Q("/"))), TOKEN_DATA(ANY, "."), // this should be the last recognized token { token_id(0) } // this should be the last entry};///////////////////////////////////////////////////////////////////////////////// C++ only token definitionstemplate <typename IteratorT, typename PositionT>typename lexer_base<IteratorT, PositionT>::lexer_data const lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] = { TOKEN_DATA(AND_ALT, "bitand"), TOKEN_DATA(ANDASSIGN_ALT, "and_eq"), TOKEN_DATA(ANDAND_ALT, "and"), TOKEN_DATA(OR_ALT, "bitor"), TOKEN_DATA(ORASSIGN_ALT, "or_eq"), TOKEN_DATA(OROR_ALT, "or"), TOKEN_DATA(XORASSIGN_ALT, "xor_eq"), TOKEN_DATA(XOR_ALT, "xor"), TOKEN_DATA(NOTEQUAL_ALT, "not_eq"), TOKEN_DATA(NOT_ALT, "not"), TOKEN_DATA(COMPL_ALT, "compl"),#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0 TOKEN_DATA(IMPORT, "import"),#endif TOKEN_DATA(ARROWSTAR, Q("->") Q("*")), TOKEN_DATA(DOTSTAR, Q(".") Q("*")), TOKEN_DATA(COLON_COLON, "::"), { token_id(0) } // this should be the last entry};///////////////////////////////////////////////////////////////////////////////// C++ only token definitionstemplate <typename IteratorT, typename PositionT>typename lexer_base<IteratorT, PositionT>::lexer_data const lexer<IteratorT, PositionT>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] = { TOKEN_DATA(PP_NUMBER, PP_NUMBERDEF), { token_id(0) } // this should be the last entry};///////////////////////////////////////////////////////////////////////////////// undefine macros, required for regular expression definitions#undef INCLUDEDEF#undef POUNDDEF#undef CCOMMENT#undef PPSPACE#undef DIGIT#undef OCTALDIGIT#undef HEXDIGIT#undef NONDIGIT#undef OPTSIGN#undef EXPSTART#undef EXPONENT#undef LONGINTEGER_SUFFIX#undef INTEGER_SUFFIX#undef INTEGER#undef FLOAT_SUFFIX#undef CHAR_SPEC#undef BACKSLASH #undef ESCAPESEQ #undef HEXQUAD #undef UNIVERSALCHAR#undef PP_NUMBERDEF#undef Q#undef TRI#undef OR#undef TOKEN_DATA#undef TOKEN_DATA_EX///////////////////////////////////////////////////////////////////////////////// initialize cpp lexer with token datatemplate <typename IteratorT, typename PositionT>inlinelexer_base<IteratorT, PositionT>::lexer_base() : base_type(NUM_LEXER_STATES){}template <typename IteratorT, typename PositionT>inline voidlexer<IteratorT, PositionT>::init_dfa(boost::wave::language_support lang){ if (this->has_compiled_dfa()) return;// if pp-numbers should be preferred, insert the corresponding rule first if (boost::wave::need_prefer_pp_numbers(lang)) { for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) { this->register_regex(init_data_pp_number[j].tokenregex, init_data_pp_number[j].tokenid, init_data_pp_number[j].tokencb, init_data_pp_number[j].lexerstate); } } // if in C99 mode, some of the keywords are not valid if (!boost::wave::need_c99(lang)) { for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) { this->register_regex(init_data_cpp[j].tokenregex, init_data_cpp[j].tokenid, init_data_cpp[j].tokencb, init_data_cpp[j].lexerstate); } } for (int i = 0; 0 != init_data[i].tokenid; ++i) { this->register_regex(init_data[i].tokenregex, init_data[i].tokenid, init_data[i].tokencb, init_data[i].lexerstate); }}///////////////////////////////////////////////////////////////////////////////// get time of last compilation of this filetemplate <typename IteratorT, typename PositionT>boost::wave::util::time_conversion_helper lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);///////////////////////////////////////////////////////////////////////////////} // namespace lexer///////////////////////////////////////////////////////////////////////////////// template <typename IteratorT, typename PositionT>inline void init_lexer (lexer::lexer<IteratorT, PositionT> &lexer, boost::wave::language_support language, bool force_reinit = false){ if (lexer.has_compiled_dfa()) return; // nothing to do using std::ifstream; using std::ofstream; using std::ios; using std::cerr; using std::endl; ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary); lexer.init_dfa(language); if (force_reinit || !dfa_in.is_open() || !lexer.load (dfa_in, (long)lexer.get_compilation_time())) {#if defined(BOOST_SPIRIT_DEBUG) cerr << "Compiling regular expressions for slex ...";#endif // defined(BOOST_SPIRIT_DEBUG) dfa_in.close(); lexer.create_dfa(); ofstream dfa_out ("wave_slex_lexer.dfa", ios::out|ios::binary|ios::trunc); if (dfa_out.is_open()) lexer.save (dfa_out, (long)lexer.get_compilation_time());#if defined(BOOST_SPIRIT_DEBUG) cerr << " Done." << endl;#endif // defined(BOOST_SPIRIT_DEBUG) }}///////////////////////////////////////////////////////////////////////////////// // lex_functor/////////////////////////////////////////////////////////////////////////////////template <typename IteratorT, typename PositionT = wave::util::file_position_type>class slex_functor : public slex_input_interface< typename lexer::lexer<IteratorT, PositionT>::token_type >{public: typedef boost::wave::util::position_iterator<IteratorT, PositionT> iterator_type; typedef typename std::iterator_traits<IteratorT>::value_type char_type; typedef BOOST_WAVE_STRINGTYPE string_type; typedef typename lexer::lexer<IteratorT, PositionT>::token_type token_type; slex_functor(IteratorT const &first_, IteratorT const &last_, PositionT const &pos_, boost::wave::language_support language_) : first(first_, last_, pos_), language(language_), at_eof(false) { // initialize lexer dfa tables init_lexer(lexer, language_); } virtual ~slex_functor() {}// get the next token from the input stream token_type& get(token_type& result) { if (!at_eof) { do { // generate and return the next token std::string value; PositionT pos = first.get_position(); // begin of token position token_id id = token_id(lexer.next_token(first, last, &value)); if ((token_id)(-1) == id) id = T_EOF; // end of input reached string_type token_val(value.c_str()); if (T_CONTLINE != id) { // The cast should avoid spurious warnings about missing case labels // for the other token ids's. switch (static_cast<unsigned int>(id)) { case T_IDENTIFIER: // test identifier characters for validity (throws if // invalid chars found) if (!boost::wave::need_no_character_validation(language)) { using boost::wave::cpplexer::impl::validate_identifier_name; validate_identifier_name(token_val, pos.get_line(), pos.get_column(), pos.get_file()); } break; case T_STRINGLIT: case T_CHARLIT: // test literal characters for validity (throws if invalid // chars found) if (boost::wave::need_convert_trigraphs(language)) { using boost::wave::cpplexer::impl::convert_trigraphs; token_val = convert_trigraphs(token_val); } if (!boost::wave::need_no_character_validation(language)) { using boost::wave::cpplexer::impl::validate_literal; validate_literal(token_val, pos.get_line(), pos.get_column(), pos.get_file()); } break; case T_LONGINTLIT: // supported in C99 and long_long mode if (!boost::wave::need_long_long(language)) { // syntax error: not allowed in C++ mode BOOST_WAVE_LEXER_THROW( boost::wave::cpplexer::lexing_exception, invalid_long_long_literal, value.c_str(), pos.get_line(), pos.get_column(), pos.get_file().c_str()); } break;#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 case T_PP_HHEADER: case T_PP_QHEADER: case T_PP_INCLUDE: // convert to the corresponding ..._next token, if appropriate { // Skip '#' and whitespace and see whether we find an // 'include_next' here. typename string_type::size_type start = value.find("include"); if (0 == value.compare(start, 12, "include_next", 12)) id = token_id(id | AltTokenType); break; }#endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 case T_EOF: // T_EOF is returned as a valid token, the next call will // return T_EOI, i.e. the actual end of input at_eof = true; token_val.clear(); break; case T_OR_TRIGRAPH: case T_XOR_TRIGRAPH: case T_LEFTBRACE_TRIGRAPH: case T_RIGHTBRACE_TRIGRAPH: case T_LEFTBRACKET_TRIGRAPH: case T_RIGHTBRACKET_TRIGRAPH: case T_COMPL_TRIGRAPH: case T_POUND_TRIGRAPH: case T_ANY_TRIGRAPH: if (boost::wave::need_convert_trigraphs(language)) { using boost::wave::cpplexer::impl::convert_trigraph; token_val = convert_trigraph(token_val); } break; } result = token_type(id, token_val, pos);#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 return guards.detect_guard(result);#else return result;#endif } // skip the T_CONTLINE token } while (true); } return result = token_type(); // return T_EOI } void set_position(PositionT const &pos) { // set position has to change the file name and line number only first.get_position().set_file(pos.get_file()); first.get_position().set_line(pos.get_line()); }#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 bool has_include_guards(std::string& guard_name) const { return guards.detected(guard_name); }#endifprivate: iterator_type first; iterator_type last; boost::wave::language_support language; static lexer::lexer<IteratorT, PositionT> lexer; // needed only once bool at_eof;#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 include_guards<token_type> guards;#endif};template <typename IteratorT, typename PositionT>lexer::lexer<IteratorT, PositionT> slex_functor<IteratorT, PositionT>::lexer;/////////////////////////////////////////////////////////////////////////////////// The 'new_lexer' function allows the opaque generation of a new lexer object.// It is coupled to the iterator type to allow to decouple the lexer/iterator // configurations at compile time.//// This function is declared inside the cpp_slex_token.hpp file, which is // referenced by the source file calling the lexer and the source file, which// instantiates the lex_functor. But is is defined here, so it will be // instantiated only while compiling the source file, which instantiates the // lex_functor. While the cpp_slex_token.hpp file may be included everywhere,// this file (cpp_slex_lexer.hpp) should be included only once. This allows// to decouple the lexer interface from the lexer implementation and reduces // compilation time.////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)// should be defined inline, if the lex_functor shouldn't be instantiated // separately from the lex_iterator.//// Separate (explicit) instantiation helps to reduce compilation time./////////////////////////////////////////////////////////////////////////////////#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE#else#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE inline#endif template <typename IteratorT, typename PositionT>BOOST_WAVE_SLEX_NEW_LEXER_INLINElex_input_interface<slex_token<PositionT> > *new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first, IteratorT const &last, PositionT const &pos, boost::wave::language_support language){ return new slex_functor<IteratorT, PositionT>(first, last, pos, language);}#undef BOOST_WAVE_SLEX_NEW_LEXER_INLINE///////////////////////////////////////////////////////////////////////////////} // namespace slex} // namespace cpplexer} // namespace wave} // namespace boost #endif // !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?