📄 cpp_re2c_lexer.hpp
字号:
/*============================================================================= Boost.Wave: A Standard compliant C++ preprocessor library Re2C based C++ lexer http://www.boost.org/ Copyright (c) 2001-2008 Hartmut Kaiser. Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)=============================================================================*/#if !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)#define CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED#include <string>#include <cstdio>#include <cstdarg>#if defined(BOOST_SPIRIT_DEBUG)#include <iostream>#endif // defined(BOOST_SPIRIT_DEBUG)#include <boost/concept_check.hpp>#include <boost/assert.hpp>#include <boost/spirit/include/classic_core.hpp>#include <boost/wave/wave_config.hpp>#include <boost/wave/language_support.hpp>#include <boost/wave/token_ids.hpp>#include <boost/wave/util/file_position.hpp>#include <boost/wave/cpplexer/validate_universal_char.hpp>#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>#include <boost/wave/cpplexer/token_cache.hpp>#include <boost/wave/cpplexer/convert_trigraphs.hpp>#include <boost/wave/cpplexer/cpp_lex_interface.hpp>#include <boost/wave/cpplexer/re2clex/scanner.hpp>#include <boost/wave/cpplexer/re2clex/cpp_re.hpp>#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0#include <boost/wave/cpplexer/detect_include_guards.hpp>#endif#include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp>// this must occur after all of the includes and before any code appears#ifdef BOOST_HAS_ABI_HEADERS#include BOOST_ABI_PREFIX#endif///////////////////////////////////////////////////////////////////////////////namespace boost {namespace wave {namespace cpplexer {namespace re2clex {///////////////////////////////////////////////////////////////////////////////// // encapsulation of the re2c based cpp lexer/////////////////////////////////////////////////////////////////////////////////template <typename IteratorT, typename PositionT = boost::wave::util::file_position_type>class lexer {public: typedef lex_token<PositionT> token_type; typedef typename token_type::string_type string_type; lexer(IteratorT const &first, IteratorT const &last, PositionT const &pos, boost::wave::language_support language_); ~lexer(); lex_token<PositionT>& get(lex_token<PositionT>&); void set_position(PositionT const &pos) { // set position has to change the file name and line number only filename = pos.get_file(); scanner.line = pos.get_line();// scanner.column = scanner.curr_column = pos.get_column(); scanner.file_name = filename.c_str(); }#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 bool has_include_guards(std::string& guard_name) const { return guards.detected(guard_name); }#endif // error reporting from the re2c generated lexer static int report_error(Scanner const* s, int code, char const *, ...);private: static char const *tok_names[]; Scanner scanner; string_type filename; string_type value; bool at_eof; boost::wave::language_support language;#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 include_guards<token_type> guards;#endif static token_cache<string_type> const cache;};///////////////////////////////////////////////////////////////////////////////// initialize cpp lexer template <typename IteratorT, typename PositionT>inlinelexer<IteratorT, PositionT>::lexer(IteratorT const &first, IteratorT const &last, PositionT const &pos, boost::wave::language_support language_) : filename(pos.get_file()), at_eof(false), language(language_){ using namespace std; // some systems have memset in std memset(&scanner, '\0', sizeof(Scanner)); scanner.eol_offsets = aq_create(); if (first != last) { scanner.first = scanner.act = (uchar *)&(*first); scanner.last = scanner.first + std::distance(first, last); } scanner.line = pos.get_line(); scanner.column = scanner.curr_column = pos.get_column(); scanner.error_proc = report_error; scanner.file_name = filename.c_str(); #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 scanner.enable_ms_extensions = true;#else scanner.enable_ms_extensions = false;#endif#if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0 scanner.act_in_c99_mode = boost::wave::need_c99(language_);#endif#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0 scanner.enable_import_keyword = !boost::wave::need_c99(language_);#else scanner.enable_import_keyword = false;#endif scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_); scanner.single_line_only = boost::wave::need_single_line(language_);}template <typename IteratorT, typename PositionT>inlinelexer<IteratorT, PositionT>::~lexer() { using namespace std; // some systems have free in std aq_terminate(scanner.eol_offsets); free(scanner.bot);}///////////////////////////////////////////////////////////////////////////////// get the next token from the input streamtemplate <typename IteratorT, typename PositionT>inline lex_token<PositionT>&lexer<IteratorT, PositionT>::get(lex_token<PositionT>& result){ if (at_eof) return result = lex_token<PositionT>(); // return T_EOI unsigned int actline = scanner.line; token_id id = token_id(scan(&scanner)); switch (static_cast<unsigned int>(id)) { case T_IDENTIFIER: // test identifier characters for validity (throws if invalid chars found) value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); if (!boost::wave::need_no_character_validation(language)) impl::validate_identifier_name(value, actline, scanner.column, filename); break; case T_STRINGLIT: case T_CHARLIT: // test literal characters for validity (throws if invalid chars found) value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); if (boost::wave::need_convert_trigraphs(language)) value = impl::convert_trigraphs(value); if (!boost::wave::need_no_character_validation(language)) impl::validate_literal(value, actline, scanner.column, filename); break;#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 case T_PP_HHEADER: case T_PP_QHEADER: case T_PP_INCLUDE: // convert to the corresponding ..._next token, if appropriate { value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); // Skip '#' and whitespace and see whether we find an 'include_next' here. typename string_type::size_type start = value.find("include"); if (value.compare(start, 12, "include_next", 12) == 0) id = token_id(id | AltTokenType); break; }#endif case T_LONGINTLIT: // supported in C99 and long_long mode value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); if (!boost::wave::need_long_long(language)) { // syntax error: not allowed in C++ mode BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal, value.c_str(), actline, scanner.column, filename.c_str()); } break; case T_OCTALINT: case T_DECIMALINT: case T_HEXAINT: case T_INTLIT: case T_FLOATLIT: case T_FIXEDPOINTLIT: case T_CCOMMENT: case T_CPPCOMMENT: case T_SPACE: case T_SPACE2: case T_ANY: case T_PP_NUMBER: value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); break; case T_EOF: // T_EOF is returned as a valid token, the next call will return T_EOI, // i.e. the actual end of input at_eof = true; value.clear(); break; case T_OR_TRIGRAPH: case T_XOR_TRIGRAPH: case T_LEFTBRACE_TRIGRAPH: case T_RIGHTBRACE_TRIGRAPH: case T_LEFTBRACKET_TRIGRAPH: case T_RIGHTBRACKET_TRIGRAPH: case T_COMPL_TRIGRAPH: case T_POUND_TRIGRAPH: if (boost::wave::need_convert_trigraphs(language)) { value = cache.get_token_value(BASEID_FROM_TOKEN(id)); } else { value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); } break; case T_ANY_TRIGRAPH: if (boost::wave::need_convert_trigraphs(language)) { value = impl::convert_trigraph( string_type((char const *)scanner.tok)); } else { value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); } break; default: if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) || IS_CATEGORY(id, UnknownTokenType)) { value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); } else { value = cache.get_token_value(id); } break; } // std::cerr << boost::wave::get_token_name(id) << ": " << value << std::endl; // the re2c lexer reports the new line number for newline tokens result = token_type(id, value, PositionT(filename, actline, scanner.column));#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 return guards.detect_guard(result);#else return result;#endif}template <typename IteratorT, typename PositionT>inline int lexer<IteratorT, PositionT>::report_error(Scanner const *s, int errcode, char const *msg, ...){ BOOST_ASSERT(0 != s); BOOST_ASSERT(0 != msg); using namespace std; // some system have vsprintf in namespace std char buffer[200]; // should be large enough va_list params; va_start(params, msg); vsprintf(buffer, msg, params); va_end(params); BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line, s->column, s->file_name);// BOOST_UNREACHABLE_RETURN(0); return 0;}///////////////////////////////////////////////////////////////////////////////// // lex_functor// /////////////////////////////////////////////////////////////////////////////// template <typename IteratorT, typename PositionT = boost::wave::util::file_position_type>class lex_functor : public lex_input_interface_generator< typename lexer<IteratorT, PositionT>::token_type >{ public: typedef typename lexer<IteratorT, PositionT>::token_type token_type; lex_functor(IteratorT const &first, IteratorT const &last, PositionT const &pos, boost::wave::language_support language) : re2c_lexer(first, last, pos, language) {} virtual ~lex_functor() {} // get the next token from the input stream token_type& get(token_type& result) { return re2c_lexer.get(result); } void set_position(PositionT const &pos) { re2c_lexer.set_position(pos); }#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 bool has_include_guards(std::string& guard_name) const { return re2c_lexer.has_include_guards(guard_name); }#endif private: lexer<IteratorT, PositionT> re2c_lexer;};///////////////////////////////////////////////////////////////////////////////template <typename IteratorT, typename PositionT>token_cache<typename lexer<IteratorT, PositionT>::string_type> const lexer<IteratorT, PositionT>::cache = token_cache<typename lexer<IteratorT, PositionT>::string_type>(); } // namespace re2clex///////////////////////////////////////////////////////////////////////////////// // The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp)// should be defined inline, if the lex_functor shouldn't be instantiated // separately from the lex_iterator.//// Separate (explicit) instantiation helps to reduce compilation time./////////////////////////////////////////////////////////////////////////////////#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0#define BOOST_WAVE_RE2C_NEW_LEXER_INLINE#else#define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline#endif /////////////////////////////////////////////////////////////////////////////////// The 'new_lexer' function allows the opaque generation of a new lexer object.// It is coupled to the iterator type to allow to decouple the lexer/iterator // configurations at compile time.//// This function is declared inside the cpp_lex_token.hpp file, which is // referenced by the source file calling the lexer and the source file, which// instantiates the lex_functor. But is is defined here, so it will be // instantiated only while compiling the source file, which instantiates the // lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,// this file (cpp_re2c_lexer.hpp) should be included only once. This allows// to decouple the lexer interface from the lexer implementation and reduces // compilation time./////////////////////////////////////////////////////////////////////////////////template <typename IteratorT, typename PositionT>BOOST_WAVE_RE2C_NEW_LEXER_INLINElex_input_interface<lex_token<PositionT> > *new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first, IteratorT const &last, PositionT const &pos, boost::wave::language_support language){ using re2clex::lex_functor; return new lex_functor<IteratorT, PositionT>(first, last, pos, language);}#undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE///////////////////////////////////////////////////////////////////////////////} // namespace cpplexer} // namespace wave} // namespace boost // the suffix header occurs after all of the code#ifdef BOOST_HAS_ABI_HEADERS#include BOOST_ABI_SUFFIX#endif#endif // !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -