📄 cpp_slex_lexer.hpp

📁 C++的一个好库。。。现在很流行
💻 HPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
                "(" ESCAPESEQ OR "[^\\n\\r']" OR UNIVERSALCHAR ")+" "'"),
    TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"") 
                "(" ESCAPESEQ OR "[^\\n\\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
    TOKEN_DATA(SPACE, BLANK "+"),
    TOKEN_DATA(SPACE2, "[\\v\\f]+"),
    TOKEN_DATA(CONTLINE, Q("\\") "\\n"), 
    TOKEN_DATA(NEWLINE, NEWLINEDEF),
    TOKEN_DATA(POUND_POUND, "##"),
    TOKEN_DATA(POUND_POUND_ALT, Q("%:") Q("%:")),
    TOKEN_DATA(POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
    TOKEN_DATA(POUND, "#"),
    TOKEN_DATA(POUND_ALT, Q("%:")),
    TOKEN_DATA(POUND_TRIGRAPH, TRI("=")),
    TOKEN_DATA(ANY, "."),
    TOKEN_DATA(ANY_TRIGRAPH, TRI(Q("/"))),
    { token_id(0) }       // this should be the last entry
};

///////////////////////////////////////////////////////////////////////////////
// C++ only token definitions
template <typename IteratorT, typename PositionT>
typename lexer_base<IteratorT, PositionT>::lexer_data const 
lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] = 
{
    TOKEN_DATA(AND_ALT, "bitand"),
    TOKEN_DATA(ANDASSIGN_ALT, "and_eq"),
    TOKEN_DATA(ANDAND_ALT, "and"),
    TOKEN_DATA(OR_ALT, "bitor"),
    TOKEN_DATA(ORASSIGN_ALT, "or_eq"),
    TOKEN_DATA(OROR_ALT, "or"),
    TOKEN_DATA(XORASSIGN_ALT, "xor_eq"),
    TOKEN_DATA(XOR_ALT, "xor"),
    TOKEN_DATA(NOTEQUAL_ALT, "not_eq"),
    TOKEN_DATA(NOT_ALT, "not"),
    TOKEN_DATA(COMPL_ALT, "compl"),
    TOKEN_DATA(ARROWSTAR, Q("->") Q("*")),
    TOKEN_DATA(DOTSTAR, Q(".") Q("*")),
    TOKEN_DATA(COLON_COLON, "::"),
    { token_id(0) }       // this should be the last entry
};

///////////////////////////////////////////////////////////////////////////////
//  undefine macros, required for regular expression definitions
#undef INCLUDEDEF
#undef POUNDDEF
#undef CCOMMENT
#undef PPSPACE
#undef DIGIT
#undef OCTALDIGIT
#undef HEXDIGIT
#undef SIGN
#undef EXPONENT
#undef LONGINTEGER_SUFFIX
#undef INTEGER_SUFFIX
#undef INTEGER
#undef FLOAT_SUFFIX
#undef CHAR_SPEC
#undef BACKSLASH    
#undef ESCAPESEQ    
#undef HEXQUAD      
#undef UNIVERSALCHAR

#undef Q
#undef TRI
#undef OR

#undef TOKEN_DATA
#undef TOKEN_DATA_EX

///////////////////////////////////////////////////////////////////////////////
// initialize cpp lexer with token data
template <typename IteratorT, typename PositionT>
inline
lexer_base<IteratorT, PositionT>::lexer_base() 
:   base_type(NUM_LEXER_STATES)
{
}

template <typename IteratorT, typename PositionT>
inline void
lexer<IteratorT, PositionT>::init_dfa(boost::wave::language_support lang)
{
    if (this->has_compiled_dfa())
        return;
        
// if in C99 mode, some of the keywords are not valid    
    if (!boost::wave::need_c99(lang)) {
        for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
            this->register_regex(init_data_cpp[j].tokenregex, 
                init_data_cpp[j].tokenid, init_data_cpp[j].tokencb, 
                init_data_cpp[j].lexerstate);
        }
    }
    
    for (int i = 0; 0 != init_data[i].tokenid; ++i) {
        this->register_regex(init_data[i].tokenregex, init_data[i].tokenid, 
            init_data[i].tokencb, init_data[i].lexerstate);
    }
}

///////////////////////////////////////////////////////////////////////////////
// get time of last compilation of this file
template <typename IteratorT, typename PositionT>
boost::wave::util::time_conversion_helper 
    lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);

///////////////////////////////////////////////////////////////////////////////
}   // namespace lexer

///////////////////////////////////////////////////////////////////////////////
//  
template <typename IteratorT, typename PositionT>
inline void 
init_lexer (lexer::lexer<IteratorT, PositionT> &lexer, 
    boost::wave::language_support language, bool force_reinit = false)
{
    if (lexer.has_compiled_dfa())
        return;     // nothing to do
        
    using std::ifstream;
    using std::ofstream;
    using std::ios;
    using std::cerr;
    using std::endl;
    
ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary);

    lexer.init_dfa(language);
    if (force_reinit || !dfa_in.is_open() ||
        !lexer.load (dfa_in, (long)lexer.get_compilation_time()))
    {
#if defined(BOOST_SPIRIT_DEBUG)
        cerr << "Compiling regular expressions for slex ...";
#endif // defined(BOOST_SPIRIT_DEBUG)

        dfa_in.close();
        lexer.create_dfa();

    ofstream dfa_out ("wave_slex_lexer.dfa", ios::out|ios::binary|ios::trunc);

        if (dfa_out.is_open())
            lexer.save (dfa_out, (long)lexer.get_compilation_time());

#if defined(BOOST_SPIRIT_DEBUG)
        cerr << " Done." << endl;
#endif // defined(BOOST_SPIRIT_DEBUG)
    }
}

///////////////////////////////////////////////////////////////////////////////
//  
//  lex_functor
//
///////////////////////////////////////////////////////////////////////////////

template <typename IteratorT, typename PositionT = wave::util::file_position_type>
class slex_functor 
:   public slex_input_interface<
        typename lexer::lexer<IteratorT, PositionT>::token_type
    >
{
public:

    typedef boost::wave::util::position_iterator<IteratorT, PositionT>
          iterator_type;
    typedef typename std::iterator_traits<IteratorT>::value_type    char_type;
    typedef BOOST_WAVE_STRINGTYPE                                   string_type;
    typedef typename lexer::lexer<IteratorT, PositionT>::token_type token_type;

    slex_functor(IteratorT const &first_, IteratorT const &last_, 
            PositionT const &pos_, boost::wave::language_support language)
    :   first(first_, last_, pos_), language(language), at_eof(false)
    {
        // initialize lexer dfa tables
        init_lexer(lexer, language);  
    }
    virtual ~slex_functor() {}

// get the next token from the input stream
    token_type get()
    {
        token_type token;

        if (!at_eof) {
            do {
            // generate and return the next token
            std::string value;
            PositionT pos = first.get_position();   // begin of token position
            token_id id = token_id(lexer.next_token(first, last, &value));

                if ((token_id)(-1) == id)
                    id = T_EOF;     // end of input reached

            string_type token_val(value.c_str());
            
                if (T_CONTLINE != id) {
                //  The cast should avoid spurious warnings about missing case labels 
                //  for the other token ids's.
                    switch (static_cast<unsigned int>(id)) {   
                    case T_IDENTIFIER:
                    // test identifier characters for validity (throws if 
                    // invalid chars found)
                        if (!(language & support_option_no_character_validation)) {
                            using boost::wave::cpplexer::impl::validate_identifier_name;
                            validate_identifier_name(token_val, 
                                pos.get_line(), pos.get_column(), pos.get_file()); 
                        }
                        break;

                    case T_STRINGLIT:
                    case T_CHARLIT:
                    // test literal characters for validity (throws if invalid 
                    // chars found)
                        if (language & support_option_convert_trigraphs) {
                            using boost::wave::cpplexer::impl::convert_trigraphs;
                            token_val = convert_trigraphs(token_val, 
                                pos.get_line(), pos.get_column(), pos.get_file()); 
                        }
                        if (!(language & support_option_no_character_validation)) {
                            using boost::wave::cpplexer::impl::validate_literal;
                            validate_literal(token_val, 
                                pos.get_line(), pos.get_column(), pos.get_file()); 
                        }
                        break;
                        
                    case T_LONGINTLIT:  // supported in C99 and long_long mode
                        if (!boost::wave::need_long_long(language)) {
                        // syntax error: not allowed in C++ mode
                            BOOST_WAVE_LEXER_THROW(
                                boost::wave::cpplexer::lexing_exception, 
                                invalid_long_long_literal, value.c_str(), 
                                pos.get_line(), pos.get_column(), 
                                pos.get_file().c_str());
                        }
                        break;

#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
                    case T_PP_HHEADER:
                    case T_PP_QHEADER:
                    case T_PP_INCLUDE:
                    // convert to the corresponding ..._next token, if appropriate
                        {
                        // Skip '#' and whitespace and see whether we find an 
                        // 'include_next' here.
                            typename string_type::size_type start = value.find("include");
                            if (0 == value.compare(start, 12, "include_next", 12))
                                id = token_id(id | AltTokenType);
                            break;
                        }
#endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0

                    case T_EOF:
                    // T_EOF is returned as a valid token, the next call will 
                    // return T_EOI, i.e. the actual end of input
                        at_eof = true;
                        token_val.clear();
                        break;
                        
                    case T_OR_TRIGRAPH:
                    case T_XOR_TRIGRAPH:
                    case T_LEFTBRACE_TRIGRAPH:
                    case T_RIGHTBRACE_TRIGRAPH:
                    case T_LEFTBRACKET_TRIGRAPH:
                    case T_RIGHTBRACKET_TRIGRAPH:
                    case T_COMPL_TRIGRAPH:
                    case T_POUND_TRIGRAPH:
                    case T_ANY_TRIGRAPH:
                        if (language & support_option_convert_trigraphs)
                        {
                            using boost::wave::cpplexer::impl::convert_trigraph;
                            token_val = convert_trigraph(
                                token_val, pos.get_line(), pos.get_column(), 
                                pos.get_file());
                        }
                        break;
                    }
                    return token_type(id, token_val, pos);
                }
            
            // skip the T_CONTLINE token
            } while (true);
        }
        return token;       // return T_EOI
    }
    void set_position(PositionT const &pos) 
    { 
        // set position has to change the file name and line number only
        first.get_position().set_file(pos.get_file()); 
        first.get_position().set_line(pos.get_line()); 
    }
    
private:
    iterator_type first;
    iterator_type last;
    boost::wave::language_support language;
    static lexer::lexer<IteratorT, PositionT> lexer;   // needed only once
    
    bool at_eof;
};

template <typename IteratorT, typename PositionT>
lexer::lexer<IteratorT, PositionT> slex_functor<IteratorT, PositionT>::lexer;

///////////////////////////////////////////////////////////////////////////////
//
//  The 'new_lexer' function allows the opaque generation of a new lexer object.
//  It is coupled to the iterator type to allow to decouple the lexer/iterator 
//  configurations at compile time.
//
//  This function is declared inside the cpp_slex_token.hpp file, which is 
//  referenced by the source file calling the lexer and the source file, which
//  instantiates the lex_functor. But is is defined here, so it will be 
//  instantiated only while compiling the source file, which instantiates the 
//  lex_functor. While the cpp_slex_token.hpp file may be included everywhere,
//  this file (cpp_slex_lexer.hpp) should be included only once. This allows
//  to decouple the lexer interface from the lexer implementation and reduces 
//  compilation time.
//
///////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////////
//  
//  The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
//  should be defined inline, if the lex_functor shouldn't be instantiated 
//  separately from the lex_iterator.
//
//  Separate (explicit) instantiation helps to reduce compilation time.
//
///////////////////////////////////////////////////////////////////////////////

#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE
#else
#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE inline
#endif 

template <typename IteratorT, typename PositionT>
BOOST_WAVE_SLEX_NEW_LEXER_INLINE
slex_input_interface<slex_token<PositionT> > *
new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
    IteratorT const &last, PositionT const &pos, 
    boost::wave::language_support language)
{
    return new slex_functor<IteratorT, PositionT>(first, last, pos, 
        language);
}

#undef BOOST_WAVE_SLEX_NEW_LEXER_INLINE

///////////////////////////////////////////////////////////////////////////////
}   // namespace slex
}   // namespace cpplexer
}   // namespace wave
}   // namespace boost
     
#endif // !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)
上一页 12
💿 文件大小 21075 K
👤 上传用户 lyyfengyutongzh
📂 所属分类中间件编程
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -