cpp_slex_lexer.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 769 行 · 第 1/2 页

HPP
769
字号
    TOKEN_DATA(POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),    TOKEN_DATA(POUND, "#"),    TOKEN_DATA(POUND_ALT, Q("%:")),    TOKEN_DATA(POUND_TRIGRAPH, TRI("=")),    TOKEN_DATA(ANY_TRIGRAPH, TRI(Q("/"))),    TOKEN_DATA(ANY, "."),     // this should be the last recognized token    { token_id(0) }           // this should be the last entry};///////////////////////////////////////////////////////////////////////////////// C++ only token definitionstemplate <typename IteratorT, typename PositionT>typename lexer_base<IteratorT, PositionT>::lexer_data const lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] = {    TOKEN_DATA(AND_ALT, "bitand"),    TOKEN_DATA(ANDASSIGN_ALT, "and_eq"),    TOKEN_DATA(ANDAND_ALT, "and"),    TOKEN_DATA(OR_ALT, "bitor"),    TOKEN_DATA(ORASSIGN_ALT, "or_eq"),    TOKEN_DATA(OROR_ALT, "or"),    TOKEN_DATA(XORASSIGN_ALT, "xor_eq"),    TOKEN_DATA(XOR_ALT, "xor"),    TOKEN_DATA(NOTEQUAL_ALT, "not_eq"),    TOKEN_DATA(NOT_ALT, "not"),    TOKEN_DATA(COMPL_ALT, "compl"),#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0    TOKEN_DATA(IMPORT, "import"),#endif    TOKEN_DATA(ARROWSTAR, Q("->") Q("*")),    TOKEN_DATA(DOTSTAR, Q(".") Q("*")),    TOKEN_DATA(COLON_COLON, "::"),    { token_id(0) }       // this should be the last entry};///////////////////////////////////////////////////////////////////////////////// C++ only token definitionstemplate <typename IteratorT, typename PositionT>typename lexer_base<IteratorT, PositionT>::lexer_data const lexer<IteratorT, PositionT>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] = {    TOKEN_DATA(PP_NUMBER, PP_NUMBERDEF),    { token_id(0) }       // this should be the last entry};/////////////////////////////////////////////////////////////////////////////////  undefine macros, required for regular expression definitions#undef INCLUDEDEF#undef POUNDDEF#undef CCOMMENT#undef PPSPACE#undef DIGIT#undef OCTALDIGIT#undef HEXDIGIT#undef NONDIGIT#undef OPTSIGN#undef EXPSTART#undef EXPONENT#undef LONGINTEGER_SUFFIX#undef INTEGER_SUFFIX#undef INTEGER#undef FLOAT_SUFFIX#undef CHAR_SPEC#undef BACKSLASH    #undef ESCAPESEQ    #undef HEXQUAD      #undef UNIVERSALCHAR#undef PP_NUMBERDEF#undef Q#undef TRI#undef OR#undef TOKEN_DATA#undef TOKEN_DATA_EX///////////////////////////////////////////////////////////////////////////////// initialize cpp lexer with token datatemplate <typename IteratorT, typename PositionT>inlinelexer_base<IteratorT, PositionT>::lexer_base() :   base_type(NUM_LEXER_STATES){}template <typename IteratorT, typename PositionT>inline voidlexer<IteratorT, PositionT>::init_dfa(boost::wave::language_support lang){    if (this->has_compiled_dfa())        return;// if pp-numbers should be preferred, insert the corresponding rule first    if (boost::wave::need_prefer_pp_numbers(lang)) {        for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {            this->register_regex(init_data_pp_number[j].tokenregex,                 init_data_pp_number[j].tokenid, init_data_pp_number[j].tokencb,                 init_data_pp_number[j].lexerstate);        }    }        // if in C99 mode, some of the keywords are not valid        if (!boost::wave::need_c99(lang)) {        for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {            this->register_regex(init_data_cpp[j].tokenregex,                 init_data_cpp[j].tokenid, init_data_cpp[j].tokencb,                 init_data_cpp[j].lexerstate);        }    }        for (int i = 0; 0 != init_data[i].tokenid; ++i) {        this->register_regex(init_data[i].tokenregex, init_data[i].tokenid,             init_data[i].tokencb, init_data[i].lexerstate);    }}///////////////////////////////////////////////////////////////////////////////// get time of last compilation of this filetemplate <typename IteratorT, typename PositionT>boost::wave::util::time_conversion_helper     lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);///////////////////////////////////////////////////////////////////////////////}   // namespace lexer/////////////////////////////////////////////////////////////////////////////////  template <typename IteratorT, typename PositionT>inline void init_lexer (lexer::lexer<IteratorT, PositionT> &lexer,     boost::wave::language_support language, bool force_reinit = false){    if (lexer.has_compiled_dfa())        return;     // nothing to do            using std::ifstream;    using std::ofstream;    using std::ios;    using std::cerr;    using std::endl;    ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary);    lexer.init_dfa(language);    if (force_reinit || !dfa_in.is_open() ||        !lexer.load (dfa_in, (long)lexer.get_compilation_time()))    {#if defined(BOOST_SPIRIT_DEBUG)        cerr << "Compiling regular expressions for slex ...";#endif // defined(BOOST_SPIRIT_DEBUG)        dfa_in.close();        lexer.create_dfa();    ofstream dfa_out ("wave_slex_lexer.dfa", ios::out|ios::binary|ios::trunc);        if (dfa_out.is_open())            lexer.save (dfa_out, (long)lexer.get_compilation_time());#if defined(BOOST_SPIRIT_DEBUG)        cerr << " Done." << endl;#endif // defined(BOOST_SPIRIT_DEBUG)    }}/////////////////////////////////////////////////////////////////////////////////  //  lex_functor/////////////////////////////////////////////////////////////////////////////////template <typename IteratorT, typename PositionT = wave::util::file_position_type>class slex_functor :   public slex_input_interface<        typename lexer::lexer<IteratorT, PositionT>::token_type    >{public:    typedef boost::wave::util::position_iterator<IteratorT, PositionT>          iterator_type;    typedef typename std::iterator_traits<IteratorT>::value_type    char_type;    typedef BOOST_WAVE_STRINGTYPE                                   string_type;    typedef typename lexer::lexer<IteratorT, PositionT>::token_type token_type;    slex_functor(IteratorT const &first_, IteratorT const &last_,             PositionT const &pos_, boost::wave::language_support language_)    :   first(first_, last_, pos_), language(language_), at_eof(false)    {        // initialize lexer dfa tables        init_lexer(lexer, language_);      }    virtual ~slex_functor() {}// get the next token from the input stream    token_type& get(token_type& result)    {        if (!at_eof) {            do {            // generate and return the next token            std::string value;            PositionT pos = first.get_position();   // begin of token position            token_id id = token_id(lexer.next_token(first, last, &value));                if ((token_id)(-1) == id)                    id = T_EOF;     // end of input reached            string_type token_val(value.c_str());                            if (T_CONTLINE != id) {                //  The cast should avoid spurious warnings about missing case labels                 //  for the other token ids's.                    switch (static_cast<unsigned int>(id)) {                       case T_IDENTIFIER:                    // test identifier characters for validity (throws if                     // invalid chars found)                        if (!boost::wave::need_no_character_validation(language)) {                            using boost::wave::cpplexer::impl::validate_identifier_name;                            validate_identifier_name(token_val,                                 pos.get_line(), pos.get_column(), pos.get_file());                         }                        break;                    case T_STRINGLIT:                    case T_CHARLIT:                    // test literal characters for validity (throws if invalid                     // chars found)                        if (boost::wave::need_convert_trigraphs(language)) {                            using boost::wave::cpplexer::impl::convert_trigraphs;                            token_val = convert_trigraphs(token_val);                         }                        if (!boost::wave::need_no_character_validation(language)) {                            using boost::wave::cpplexer::impl::validate_literal;                            validate_literal(token_val,                                 pos.get_line(), pos.get_column(), pos.get_file());                         }                        break;                                            case T_LONGINTLIT:  // supported in C99 and long_long mode                        if (!boost::wave::need_long_long(language)) {                        // syntax error: not allowed in C++ mode                            BOOST_WAVE_LEXER_THROW(                                boost::wave::cpplexer::lexing_exception,                                 invalid_long_long_literal, value.c_str(),                                 pos.get_line(), pos.get_column(),                                 pos.get_file().c_str());                        }                        break;#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0                    case T_PP_HHEADER:                    case T_PP_QHEADER:                    case T_PP_INCLUDE:                    // convert to the corresponding ..._next token, if appropriate                        {                        // Skip '#' and whitespace and see whether we find an                         // 'include_next' here.                            typename string_type::size_type start = value.find("include");                            if (0 == value.compare(start, 12, "include_next", 12))                                id = token_id(id | AltTokenType);                            break;                        }#endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0                    case T_EOF:                    // T_EOF is returned as a valid token, the next call will                     // return T_EOI, i.e. the actual end of input                        at_eof = true;                        token_val.clear();                        break;                                            case T_OR_TRIGRAPH:                    case T_XOR_TRIGRAPH:                    case T_LEFTBRACE_TRIGRAPH:                    case T_RIGHTBRACE_TRIGRAPH:                    case T_LEFTBRACKET_TRIGRAPH:                    case T_RIGHTBRACKET_TRIGRAPH:                    case T_COMPL_TRIGRAPH:                    case T_POUND_TRIGRAPH:                    case T_ANY_TRIGRAPH:                        if (boost::wave::need_convert_trigraphs(language))                        {                            using boost::wave::cpplexer::impl::convert_trigraph;                            token_val = convert_trigraph(token_val);                        }                        break;                    }                    result = token_type(id, token_val, pos);#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0                    return guards.detect_guard(result);#else                    return result;#endif                }            // skip the T_CONTLINE token            } while (true);        }        return result = token_type();   // return T_EOI    }    void set_position(PositionT const &pos)     {         // set position has to change the file name and line number only        first.get_position().set_file(pos.get_file());         first.get_position().set_line(pos.get_line());     }#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0    bool has_include_guards(std::string& guard_name) const         { return guards.detected(guard_name); }#endifprivate:    iterator_type first;    iterator_type last;    boost::wave::language_support language;    static lexer::lexer<IteratorT, PositionT> lexer;   // needed only once        bool at_eof;#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0    include_guards<token_type> guards;#endif};template <typename IteratorT, typename PositionT>lexer::lexer<IteratorT, PositionT> slex_functor<IteratorT, PositionT>::lexer;///////////////////////////////////////////////////////////////////////////////////  The 'new_lexer' function allows the opaque generation of a new lexer object.//  It is coupled to the iterator type to allow to decouple the lexer/iterator //  configurations at compile time.////  This function is declared inside the cpp_slex_token.hpp file, which is //  referenced by the source file calling the lexer and the source file, which//  instantiates the lex_functor. But is is defined here, so it will be //  instantiated only while compiling the source file, which instantiates the //  lex_functor. While the cpp_slex_token.hpp file may be included everywhere,//  this file (cpp_slex_lexer.hpp) should be included only once. This allows//  to decouple the lexer interface from the lexer implementation and reduces //  compilation time.//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////  //  The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)//  should be defined inline, if the lex_functor shouldn't be instantiated //  separately from the lex_iterator.////  Separate (explicit) instantiation helps to reduce compilation time./////////////////////////////////////////////////////////////////////////////////#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE#else#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE inline#endif template <typename IteratorT, typename PositionT>BOOST_WAVE_SLEX_NEW_LEXER_INLINElex_input_interface<slex_token<PositionT> > *new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,    IteratorT const &last, PositionT const &pos,     boost::wave::language_support language){    return new slex_functor<IteratorT, PositionT>(first, last, pos,         language);}#undef BOOST_WAVE_SLEX_NEW_LEXER_INLINE///////////////////////////////////////////////////////////////////////////////}   // namespace slex}   // namespace cpplexer}   // namespace wave}   // namespace boost     #endif // !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?