⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parser.hpp

📁 Boost provides free peer-reviewed portable C++ source libraries. We emphasize libraries that work
💻 HPP
📖 第 1 页 / 共 2 页
字号:
// parser.hpp// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)//// Distributed under the Boost Software License, Version 1.0. (See accompanying// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)#ifndef BOOST_LEXER_PARSER_HPP#define BOOST_LEXER_PARSER_HPP#include <assert.h>#include "tree/end_node.hpp"#include "tree/iteration_node.hpp"#include "tree/leaf_node.hpp"#include "../runtime_error.hpp"#include "tree/selection_node.hpp"#include "tree/sequence_node.hpp"#include "../size_t.hpp"#include "tokeniser/re_tokeniser.hpp"namespace boost{namespace lexer{namespace detail{template<typename CharT>class basic_parser{public:    typedef basic_re_tokeniser<CharT> tokeniser;    typedef typename tokeniser::string string;    typedef std::map<string, const node *> macro_map;    typedef node::node_ptr_vector node_ptr_vector;    typedef typename tokeniser::num_token token;/*    General principles of regex parsing:    - Every regex is a sequence of sub-regexes.    - Regexes consist of operands and operators    - All operators decompose to sequence, selection ('|') and iteration ('*')    - Regex tokens are stored on the stack.    - When a complete sequence of regex tokens is on the stack it is processed.Grammar:<REGEX>      -> <OREXP><OREXP>      -> <SEQUENCE> | <OREXP>'|'<SEQUENCE><SEQUENCE>   -> <SUB><SUB>        -> <EXPRESSION> | <SUB><EXPRESSION><EXPRESSION> -> <REPEAT><REPEAT>     -> charset | macro | '('<REGEX>')' | <REPEAT><DUPLICATE><DUPLICATE>  -> '?' | '*' | '+' | '{n[,[m]]}'*/    static node *parse (const CharT *start_, const CharT * const end_,        const std::size_t id_, const std::size_t dfa_state_,        const bool case_sensitive_, const bool dot_not_newline_,        const std::locale &locale_, node_ptr_vector &node_ptr_vector_,        const macro_map &macromap_, typename tokeniser::token_map &map_,        bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)    {        node *root_ = 0;        state state_ (start_, end_, case_sensitive_, locale_,            dot_not_newline_);        token lhs_token_;        token rhs_token_;        token_stack token_stack_;        tree_node_stack tree_node_stack_;        char action_ = 0;        token_stack_.push (rhs_token_);        tokeniser::next (state_, map_, rhs_token_);        do        {            lhs_token_ = token_stack_.top ();            action_ = lhs_token_.precedence (rhs_token_._type);            switch (action_)            {            case '<':            case '=':                token_stack_.push (rhs_token_);                tokeniser::next (state_, map_, rhs_token_);                break;            case '>':                reduce (token_stack_, macromap_, node_ptr_vector_,                    tree_node_stack_);                break;            default:                std::ostringstream ss_;                ss_ << "A syntax error occurred: '" <<                    lhs_token_.precedence_string () <<                    "' against '" << rhs_token_.precedence_string () <<                    "' at index " << state_._index << ".";                throw runtime_error (ss_.str ().c_str ());                break;            }        } while (!token_stack_.empty ());        if (tree_node_stack_.empty ())        {            throw runtime_error ("Empty rules are not allowed.");        }        assert (tree_node_stack_.size () == 1);        node *lhs_node_ = tree_node_stack_.top ();        tree_node_stack_.pop ();        if (id_ == 0)        {            // Macros have no end state...            root_ = lhs_node_;        }        else        {            node_ptr_vector_->push_back (0);            node *rhs_node_ = new end_node (id_, dfa_state_);            node_ptr_vector_->back () = rhs_node_;            node_ptr_vector_->push_back (0);            node_ptr_vector_->back () = new sequence_node (lhs_node_, rhs_node_);            root_ = node_ptr_vector_->back ();        }        // Done this way as bug in VC++ 6 prevents |= operator working        // properly!        if (state_._seen_BOL_assertion) seen_BOL_assertion_ = true;        if (state_._seen_EOL_assertion) seen_EOL_assertion_ = true;        return root_;    }private:    typedef typename tokeniser::state state;    typedef std::stack<token> token_stack;    typedef node::node_stack tree_node_stack;    static void reduce (token_stack &token_stack_,        const macro_map &macromap_, node_ptr_vector &node_vector_ptr_,        tree_node_stack &tree_node_stack_)    {        typename tokeniser::num_token lhs_;        typename tokeniser::num_token rhs_;        token_stack handle_;        char action_ = 0;        do        {            rhs_ = token_stack_.top ();            token_stack_.pop ();            handle_.push (rhs_);            if (!token_stack_.empty ())            {                lhs_ = token_stack_.top ();                action_ = lhs_.precedence (rhs_._type);            }        } while (!token_stack_.empty () && action_ == '=');        assert (token_stack_.empty () || action_ == '<');        switch (rhs_._type)        {        case token::BEGIN:            // finished processing so exit            break;        case token::REGEX:            // finished parsing, nothing to do            break;        case token::OREXP:            orexp (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);            break;        case token::SEQUENCE:            token_stack_.push (token::OREXP);            break;        case token::SUB:            sub (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);            break;        case token::EXPRESSION:            token_stack_.push (token::SUB);            break;        case token::REPEAT:            repeat (handle_, token_stack_);            break;        case token::CHARSET:            charset (handle_, token_stack_, node_vector_ptr_,                tree_node_stack_);            break;        case token::MACRO:            macro (handle_, token_stack_, macromap_, node_vector_ptr_,                tree_node_stack_);            break;        case token::OPENPAREN:            openparen (handle_, token_stack_);            break;        case token::OPT:        case token::AOPT:            optional (rhs_._type == token::OPT, node_vector_ptr_,                tree_node_stack_);            token_stack_.push (token::DUP);            break;        case token::ZEROORMORE:        case token::AZEROORMORE:            zero_or_more (rhs_._type == token::ZEROORMORE, node_vector_ptr_,                tree_node_stack_);            token_stack_.push (token::DUP);            break;        case token::ONEORMORE:        case token::AONEORMORE:            one_or_more (rhs_._type == token::ONEORMORE, node_vector_ptr_,                tree_node_stack_);            token_stack_.push (token::DUP);            break;        case token::REPEATN:        case token::AREPEATN:            repeatn (rhs_._type == token::REPEATN, handle_.top (),                node_vector_ptr_, tree_node_stack_);            token_stack_.push (token::DUP);            break;        default:            throw runtime_error                ("Internal error regex_parser::reduce");            break;        }    }    static void orexp (token_stack &handle_, token_stack &token_stack_,        node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)    {        assert (handle_.top ()._type == token::OREXP &&            (handle_.size () == 1 || handle_.size () == 3));        if (handle_.size () == 1)        {            token_stack_.push (token::REGEX);        }        else        {            handle_.pop ();            assert (handle_.top ()._type == token::OR);            handle_.pop ();            assert (handle_.top ()._type == token::SEQUENCE);            perform_or (node_ptr_vector_, tree_node_stack_);            token_stack_.push (token::OREXP);        }    }    static void sub (token_stack &handle_, token_stack &token_stack_,        node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)    {        assert (handle_.top ()._type == token::SUB &&            handle_.size () == 1 || handle_.size () == 2);        if (handle_.size () == 1)        {            token_stack_.push (token::SEQUENCE);        }        else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -