⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lexer.hpp

📁 C++的一个好库。。。现在很流行
💻 HPP
📖 第 1 页 / 共 5 页
字号:
                node* top_node = new cat_node(newnode, cnode);
                newnode = top_node;
            }

            // or together the various parts
            if (savednode)
            {
                node* top_node = new or_node(savednode, newnode);
                savednode = top_node;
            }
            else
            {
                savednode = newnode;
            }
        }


        if (first_time)
        {
            stack.push(savednode);
        }
        else
        {
            node* top = stack.top();
            stack.pop();

            node* newtop = new or_node(top, savednode);
            stack.push(newtop);
        }
    }
} // namespace ccl_utils

template <typename ScannerT>
class make_char
{
    typedef typename ScannerT::iterator_t iterator_type;

public:
    typedef
        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
        char_t;

    make_char(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_type const& first, iterator_type const& last) const
    {
        const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
            escape_char_parser<lex_escapes, char_t>();
        char_t the_char;
        iterator_type first_ = first;
        ScannerT scan(first_, last);
        lex_escape_ch[assign(the_char)].parse(scan);
        node* newnode = ccl_utils::create_mb_node_seq(the_char);
        m_stack.push(newnode);
    }

    std::stack<node*>& m_stack;
};


template <typename ScannerT>
class make_ccl
{
    typedef typename ScannerT::iterator_t iterator_type;

public:
    typedef
        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
        char_t;

    make_ccl(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    static bool is_equal_to_string(iterator_type first,
        iterator_type const & last, const char* str)
    {
        while (first != last &&*str &&*first ==*str)
        {
            ++first;
            ++str;
        }
        return*str == 0;
    }

    template <typename ParserT>
    static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
    {
        for (int i = 0; i < 256; ++i)
        {
            if (parser.test(static_cast<char_t>(uchar(i))))
                rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
        }
    }

    void operator()(iterator_type const& first_, iterator_type const& last) const
    {
        BOOST_ASSERT(*first_ == '[');

        iterator_type first = first_;
        ++first; // skip over '['
        bool negated_ccl = false;
        if (*first == '^')
        {
            negated_ccl = true;
            ++first;
        }

        utility::impl::range_run<char_t> rr;
        while (first != last &&*first != ']')
        {
            if (*first == '[') // it's a ccl_expr like [:space:]
            {
                // check for [:space:], etc.
                if (is_equal_to_string(first, last, "[:alnum:]"))
                {
                    fill_ccl(rr, alnum_p);
                }
                else if (is_equal_to_string(first, last, "[:alpha:]"))
                {
                    fill_ccl(rr, alpha_p);
                }
                else if (is_equal_to_string(first, last, "[:blank:]"))
                {
                    fill_ccl(rr, blank_p);
                }
                else if (is_equal_to_string(first, last, "[:cntrl:]"))
                {
                    fill_ccl(rr, cntrl_p);
                }
                else if (is_equal_to_string(first, last, "[:digit:]"))
                {
                    fill_ccl(rr, digit_p);
                }
                else if (is_equal_to_string(first, last, "[:graph:]"))
                {
                    fill_ccl(rr, graph_p);
                }
                else if (is_equal_to_string(first, last, "[:lower:]"))
                {
                    fill_ccl(rr, lower_p);
                }
                else if (is_equal_to_string(first, last, "[:print:]"))
                {
                    fill_ccl(rr, print_p);
                }
                else if (is_equal_to_string(first, last, "[:punct:]"))
                {
                    fill_ccl(rr, punct_p);
                }
                else if (is_equal_to_string(first, last, "[:space:]"))
                {
                    fill_ccl(rr, space_p);
                }
                else if (is_equal_to_string(first, last, "[:upper:]"))
                {
                    fill_ccl(rr, upper_p);
                }
                else if (is_equal_to_string(first, last, "[:xdigit:]"))
                {
                    fill_ccl(rr, xdigit_p);
                }
                // this can't happen, because it's parsed before we get here.
                //else
                //    throw bad_regex();

                // Advance past the character class expression
                while (first != last &&*first != ']')
                    ++first;
                BOOST_ASSERT(*first == ']');
                ++first;
            }
            else {
                const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
                    escape_char_parser<lex_escapes, char_t>();

                char_t c1;
                ScannerT scan(first, last);
                lex_escape_ch[assign(c1)].parse(scan);
                if (*scan.first == '-') // insert a range
                {
                    ++scan.first;
                    char_t c2;
                    lex_escape_ch[assign(c2)].parse(scan);
                    BOOST_ASSERT(c1 < c2); // Throw exception?
                    rr.set(utility::impl::range<char_t>(c1, c2));
                }
                else // insert 1 char
                {
                    rr.set(utility::impl::range<char_t>(c1, c1));
                }
            }
        }

        if (negated_ccl)
        {
            rr = ccl_utils::negate_range_run(rr);
        }

        ccl_utils::create_nodes(rr, m_stack);
    }

    std::stack<node*>& m_stack;
};

template <typename ScannerT>
class make_any_char
{
    typedef typename ScannerT::iterator_t iterator_type;

public:
    typedef
        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
        char_t;

    std::stack<node*>& m_stack;

    make_any_char(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(const char_t c) const
    {
        BOOST_ASSERT(c == '.');
        do_any_char();
    }

    void do_any_char() const
    {
        static utility::impl::range_run<char_t> rr;
        rr.set(full_range<char_t>());
        char_t newline = '\n';
        rr.clear(utility::impl::range<char_t>(newline, newline));

        ccl_utils::create_nodes(rr, m_stack);
    }
};

template <typename ScannerT>
class make_string
{
    typedef typename ScannerT::iterator_t iterator_type;

public:
    typedef
        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
        char_t;

    std::stack<node*>& m_stack;

    make_string(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_type const& first, iterator_type const& last) const
    {
        BOOST_ASSERT(*first == '"');

        iterator_type first_ = first;
        ScannerT scan(first_, last);
        ++scan.first; // skip over '"'

        // empty string not allowed
        if (*scan.first == '"')
        {
            throw bad_regex();
        }

        const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
            escape_char_parser<lex_escapes, char_t>();

        char_t c;
        lex_escape_ch[assign(c)].parse(scan);
        node* top_node = ccl_utils::create_mb_node_seq(c);

        while (*scan.first != '"' && scan.first != scan.last)
        {
            lex_escape_ch[assign(c)].parse(scan);
            node* cur_node = ccl_utils::create_mb_node_seq(c);
            top_node = new cat_node(top_node, cur_node);
        }
        m_stack.push(top_node);
    }
};

inline
node* repeat_node(node* n, int num)
{
    node* list_of_nodes = n;
    for (int i = 1; i < num; ++i)
    {
        list_of_nodes = new cat_node(list_of_nodes, n->clone());
    }
    return list_of_nodes;
}

inline
node* optional_node(node* n)
{
    return new or_node(n, new epsilon_node());
}

template <typename ScannerT>
class make_rep1
{
    typedef typename ScannerT::iterator_t iterator_type;

public:
    typedef
        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
        char_t;

    std::stack<node*>& m_stack;

    make_rep1(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_type const& first, iterator_type const& last) const
    {
        BOOST_ASSERT(*first == '{');

        iterator_type first_ = first;
        ScannerT scan(first_, last);
        ++scan.first; // skip over '{'

        unsigned int count;
        uint_p[assign(count)].parse(scan);
        if (count == 0)
            throw bad_regex();

        node* top_node = m_stack.top();
        m_stack.pop();
        top_node = repeat_node(top_node, count);
        m_stack.push(top_node);
    }
};

template <typename ScannerT>
class make_rep2
{
    typedef typename ScannerT::iterator_t iterator_type;

public:
    typedef
        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
        char_t;

    std::stack<node*>& m_stack;

    make_rep2(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_type const& first, iterator_type const& last) const
    {
        BOOST_ASSERT(*first == '{');

        iterator_type first_ = first;
        ScannerT scan (first_, last);
        ++scan.first; // skip over '{'

        unsigned int count;
        uint_p[assign(count)].parse(scan);
        if (count == 0)
            throw bad_regex();

        node* top_node = m_stack.top();
        m_stack.pop();
        top_node = new cat_node(repeat_node(top_node, count),
                new star_node(top_node->clone()));
        m_stack.push(top_node);

    }
};

template <typename ScannerT>
class make_rep3
{
    typedef typename ScannerT::iterator_t iterator_type;

public:
    typedef
        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
        char_t;

    std::stack<node*>& m_stack;

    make_rep3(std::stack<node*>& the_stack)
        : m_stack(the_stack)
        {}

    void operator()(iterator_type const& first, iterator_type const& last) const
    {
        BOOST_ASSERT(*first == '{');

        iterator_type first_ = first;
        ScannerT scan(first_, last);
        ++scan.first; // skip over '{'

        unsigned int count1, count2;
        uint_p[assign(count1)].parse(scan);
        if (count1 == 0)
            throw bad_regex();

        ++scan.first; // skip over ','

        uint_p[assign(count2)].parse(scan);
        if (count2 <= count1)
            throw bad_regex();

        node* top_node = m_stack.top();
        m_stack.pop();
        node* repeats = repeat_node(top_node, count1);
        top_node = new cat_node(repeats,
                repeat_node(optional_node(top_node->clone()),
                    count2 - count1));

        m_stack.push(top_node);
    }
};

///////////////////////////////////////////////////////////////////////////////
//
//  Lexer grammar
//
//      Defines the grammar, which mandates the syntax of the understood
//      lexeme definitions passed to lexer::register_regex.
//
///////////////////////////////////////////////////////////////////////////////
class lexer_grammar : public boost::spirit::grammar<lexer_grammar>
{
public:
    lexer_grammar(std::stack<node*> &node_stack_)
    : node_stack(node_stack_) {}

    template <typename ScannerT>
    struct definition
    {
        typedef rule<ScannerT> rule_t;
        typedef typename ScannerT::iterator_t iterator_type;
        typedef
            typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
            char_t;

        rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
            escseq, ccl_char;
        symbols<> ccl_expr;

        definition(lexer_grammar const &self)
        {
            regex =
                    re >> !('/' >> re) >> !ch_p('$')
                ;

            re =
                    series
                >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
                ;

            series =
                    singleton
                >>*( singleton[make_concat<ScannerT>(self.node_stack)] )
                ;

            singleton =
                    ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
                    >>  singleton2
                |   fullccl
                    >>  singleton2
                |   ('"' >> string >> '"')
                    [
                        make_string<ScannerT>(self.node_stack)
                    ]
                    >>  singleton2
                |   '(' >> re >> ')'
                    >>  singleton2
                |   ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
                    [
                        make_char<ScannerT>(self.node_stack)
                    ]
                    >>  singleton2
                ;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -