example2.cpp
来自「Boost provides free peer-reviewed portab」· C++ 代码 · 共 171 行
CPP
171 行
// Copyright (c) 2001-2008 Hartmut Kaiser// Copyright (c) 2001-2007 Joel de Guzman// // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)// This example shows how to create a simple lexer recognizing a couple of // different tokens and how to use this with a grammar. This example has a // heavily backtracking grammar which makes it a candidate for lexer based // parsing (all tokens are scanned and generated only once, even if // backtracking is required) which speeds up the overall parsing process // considerably, out-weighting the overhead needed for setting up the lexer.// Additionally it demonstrates how to use one of the defined tokens as a // parser component in the grammar.//// The grammar recognizes a simple input structure: any number of English // simple sentences (statements, questions and commands) are recognized and// are being counted separately.// #define BOOST_SPIRIT_DEBUG // #define BOOST_SPIRIT_LEXERTL_DEBUG#include <boost/config/warning_disable.hpp>#include <boost/spirit/include/qi.hpp>#include <boost/spirit/include/lex_lexer_lexertl.hpp>#include <boost/spirit/include/phoenix_operator.hpp>#include <iostream>#include <fstream>#include <string>#include "example.hpp"using namespace boost::spirit;using namespace boost::spirit::qi;using namespace boost::spirit::lex;using boost::phoenix::ref;///////////////////////////////////////////////////////////////////////////////// Token definition///////////////////////////////////////////////////////////////////////////////template <typename Lexer>struct example2_tokens : lexer_def<Lexer>{ template <typename Self> void def (Self& self) { // A 'word' is comprised of one or more letters and an optional // apostrophe. If it contains an apostrophe, there may only be one and // the apostrophe must be preceded and succeeded by at least 1 letter. // For example, "I'm" and "doesn't" meet the definition of 'word' we // define below. word = "[a-zA-Z]+('[a-zA-Z]+)?"; // associate the tokens and the token set with the lexer self = token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word; } token_def<> word;};///////////////////////////////////////////////////////////////////////////////// Grammar definition///////////////////////////////////////////////////////////////////////////////template <typename Iterator>struct example2_grammar : grammar<Iterator>{ template <typename TokenDef> example2_grammar(TokenDef const& tok) : example2_grammar::base_type(story), paragraphs(0), commands(0), questions(0), statements(0) { story = +paragraph ; paragraph = ( +( command [ ++ref(commands) ] | question [ ++ref(questions) ] | statement [ ++ref(statements) ] ) >> *char_(' ') >> +char_('\n') ) [ ++ref(paragraphs) ] ; command = +(tok.word | ' ' | ',') >> '!' ; question = +(tok.word | ' ' | ',') >> '?' ; statement = +(tok.word | ' ' | ',') >> '.' ; BOOST_SPIRIT_DEBUG_NODE(story); BOOST_SPIRIT_DEBUG_NODE(paragraph); BOOST_SPIRIT_DEBUG_NODE(command); BOOST_SPIRIT_DEBUG_NODE(question); BOOST_SPIRIT_DEBUG_NODE(statement); } rule<Iterator> story, paragraph, command, question, statement; int paragraphs, commands, questions, statements;};///////////////////////////////////////////////////////////////////////////////int main(){ // iterator type used to expose the underlying input stream typedef std::string::iterator base_iterator_type; // This is the token type to return from the lexer iterator typedef lexertl_token<base_iterator_type> token_type; // This is the lexer type to use to tokenize the input. // Here we use the lexertl based lexer engine. typedef lexertl_lexer<token_type> lexer_type; // This is the token definition type (derived from the given lexer type). typedef example2_tokens<lexer_type> example2_tokens; // this is the iterator type exposed by the lexer typedef lexer<example2_tokens>::iterator_type iterator_type; // this is the type of the grammar to parse typedef example2_grammar<iterator_type> example2_grammar; // now we use the types defined above to create the lexer and grammar // object instances needed to invoke the parsing process example2_tokens tokens; // Our token definition example2_grammar calc(tokens); // Our grammar definition lexer<example2_tokens> lex(tokens); // Our lexer std::string str (read_from_file("example2.input")); // At this point we generate the iterator pair used to expose the // tokenized input stream. std::string::iterator it = str.begin(); iterator_type iter = lex.begin(it, str.end()); iterator_type end = lex.end(); // Parsing is done based on the the token stream, not the character // stream read from the input. bool r = parse(iter, end, calc); if (r && iter == end) { std::cout << "-------------------------\n"; std::cout << "Parsing succeeded\n"; std::cout << "There were " << calc.commands << " commands, " << calc.questions << " questions, and " << calc.statements << " statements.\n"; std::cout << "-------------------------\n"; } else { std::cout << "-------------------------\n"; std::cout << "Parsing failed\n"; std::cout << "-------------------------\n"; } std::cout << "Bye... :-) \n\n"; return 0;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?