example3.cpp

来自「Boost provides free peer-reviewed portab」· C++ 代码 · 共 163 行

CPP
163
字号
//  Copyright (c) 2001-2008 Hartmut Kaiser//  Copyright (c) 2001-2007 Joel de Guzman// //  Distributed under the Boost Software License, Version 1.0. (See accompanying //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)//  This example shows how to create a simple lexer recognizing a couple of //  different tokens and how to use this with a grammar. This example has a //  heavily backtracking grammar which makes it a candidate for lexer based //  parsing (all tokens are scanned and generated only once, even if //  backtracking is required) which speeds up the overall parsing process //  considerably, out-weighting the overhead needed for setting up the lexer.////  Additionally, this example demonstrates, how to define a token set usable //  as the skip parser during parsing, allowing to define several tokens to be //  ignored.////  This example recognizes couplets, which are sequences of numbers enclosed //  in matching pairs of parenthesis. See the comments below to for details//  and examples.// #define BOOST_SPIRIT_LEXERTL_DEBUG// #define BOOST_SPIRIT_DEBUG#include <boost/config/warning_disable.hpp>#include <boost/spirit/include/qi.hpp>#include <boost/spirit/include/lex_lexer_lexertl.hpp>#include <iostream>#include <fstream>#include <string>#include "example.hpp"using namespace boost::spirit;using namespace boost::spirit::qi;using namespace boost::spirit::lex;/////////////////////////////////////////////////////////////////////////////////  Token definition///////////////////////////////////////////////////////////////////////////////template <typename Lexer>struct example3_tokens : lexer_def<Lexer>{    typedef typename Lexer::token_set token_set;        template <typename Self>    void def (Self& self)    {        // define the tokens to match        ellipses = "\\.\\.\\.";        number = "[0-9]+";                // define the whitespace to ignore (spaces, tabs, newlines and C-style         // comments)        white_space             =   token_def<>("[ \\t\\n]+")               // whitespace            |   "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"   // C style comments            ;                // associate the tokens and the token set with the lexer        self = ellipses | '(' | ')' | number;        self("WS") = white_space;    }        // these tokens expose the iterator_range of the matched input sequence    token_def<> ellipses, identifier, number;    token_set white_space;};/////////////////////////////////////////////////////////////////////////////////  Grammar definition///////////////////////////////////////////////////////////////////////////////template <typename Iterator, typename Lexer>struct example3_grammar   : grammar<Iterator, in_state_skipper<typename Lexer::token_set> >{    template <typename TokenDef>    example3_grammar(TokenDef const& tok)      : example3_grammar::base_type(start)    {        start             =  +(couplet | tok.ellipses)            ;        //  A couplet matches nested left and right parenthesis.        //  For example:        //    (1) (1 2) (1 2 3) ...        //    ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...        //    (((1))) ...        couplet            =   tok.number            |   '(' >> +couplet >> ')'            ;        BOOST_SPIRIT_DEBUG_NODE(start);        BOOST_SPIRIT_DEBUG_NODE(couplet);    }    typedef typename Lexer::token_set token_set;    rule<Iterator, in_state_skipper<token_set> > start, couplet;};///////////////////////////////////////////////////////////////////////////////int main(){    // iterator type used to expose the underlying input stream    typedef std::string::iterator base_iterator_type;    // This is the token type to return from the lexer iterator    typedef lexertl_token<base_iterator_type> token_type;    // This is the lexer type to use to tokenize the input.    // Here we use the lexertl based lexer engine.    typedef lexertl_lexer<token_type> lexer_type;    // This is the token definition type (derived from the given lexer type).    typedef example3_tokens<lexer_type> example3_tokens;    // this is the iterator type exposed by the lexer     typedef lexer<example3_tokens>::iterator_type iterator_type;    // this is the type of the grammar to parse    typedef example3_grammar<iterator_type, lexer_type> example3_grammar;    // now we use the types defined above to create the lexer and grammar    // object instances needed to invoke the parsing process    example3_tokens tokens;                         // Our token definition    example3_grammar calc(tokens);                  // Our grammar definition    lexer<example3_tokens> lex(tokens);             // Our lexer    std::string str (read_from_file("example3.input"));    // At this point we generate the iterator pair used to expose the    // tokenized input stream.    std::string::iterator it = str.begin();    iterator_type iter = lex.begin(it, str.end());    iterator_type end = lex.end();    // Parsing is done based on the the token stream, not the character     // stream read from the input.    // Note, how we use the token_set defined above as the skip parser.    std::string ws("WS");    bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);    if (r && iter == end)    {        std::cout << "-------------------------\n";        std::cout << "Parsing succeeded\n";        std::cout << "-------------------------\n";    }    else    {        std::cout << "-------------------------\n";        std::cout << "Parsing failed\n";        std::cout << "-------------------------\n";    }    std::cout << "Bye... :-) \n\n";    return 0;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?