欢迎来到虫虫下载站 | 资源下载 资源专辑 关于我们
虫虫下载站

lexer.hpp

Boost provides free peer-reviewed portable C++ source libraries. We emphasize libraries that work
HPP
第 1 页 / 共 5 页
字号:
        : m_stack(the_stack)        {}    void operator()(iterator_type const& first, iterator_type const& last) const    {        const escape_char_parser<lex_escapes, char_t> lex_escape_ch =            escape_char_parser<lex_escapes, char_t>();        char_t the_char;        iterator_type first_ = first;        ScannerT scan(first_, last);        lex_escape_ch[assign(the_char)].parse(scan);        node* newnode = ccl_utils::create_mb_node_seq(the_char);        m_stack.push(newnode);    }    std::stack<node*>& m_stack;};template <typename ScannerT>class make_ccl{    typedef typename ScannerT::iterator_t iterator_type;public:    typedef        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type        char_t;    make_ccl(std::stack<node*>& the_stack)        : m_stack(the_stack)        {}    static bool is_equal_to_string(iterator_type first,        iterator_type const & last, const char* str)    {        while (first != last &&*str &&*first ==*str)        {            ++first;            ++str;        }        return*str == 0;    }    template <typename ParserT>    static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)    {        for (int i = 0; i < 256; ++i)        {            if (parser.test(static_cast<char_t>(uchar(i))))                rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));        }    }    void operator()(iterator_type const& first_, iterator_type const& last) const    {        BOOST_ASSERT(*first_ == '[');        iterator_type first = first_;        ++first; // skip over '['        bool negated_ccl = false;        if (*first == '^')        {            negated_ccl = true;            ++first;        }        utility::impl::range_run<char_t> rr;        while (first != last &&*first != ']')        {            if (*first == '[') // it's a ccl_expr like [:space:]            {                // check for [:space:], etc.                if (is_equal_to_string(first, last, "[:alnum:]"))                {                    fill_ccl(rr, alnum_p);                }                else if (is_equal_to_string(first, last, "[:alpha:]"))                {                    fill_ccl(rr, alpha_p);                }                else if (is_equal_to_string(first, last, "[:blank:]"))                {                    fill_ccl(rr, blank_p);                }                else if (is_equal_to_string(first, last, "[:cntrl:]"))                {                    fill_ccl(rr, cntrl_p);                }                else if (is_equal_to_string(first, last, "[:digit:]"))                {                    fill_ccl(rr, digit_p);                }                else if (is_equal_to_string(first, last, "[:graph:]"))                {                    fill_ccl(rr, graph_p);                }                else if (is_equal_to_string(first, last, "[:lower:]"))                {                    fill_ccl(rr, lower_p);                }                else if (is_equal_to_string(first, last, "[:print:]"))                {                    fill_ccl(rr, print_p);                }                else if (is_equal_to_string(first, last, "[:punct:]"))                {                    fill_ccl(rr, punct_p);                }                else if (is_equal_to_string(first, last, "[:space:]"))                {                    fill_ccl(rr, space_p);                }                else if (is_equal_to_string(first, last, "[:upper:]"))                {                    fill_ccl(rr, upper_p);                }                else if (is_equal_to_string(first, last, "[:xdigit:]"))                {                    fill_ccl(rr, xdigit_p);                }                // this can't happen, because it's parsed before we get here.                //else                //    throw bad_regex();                // Advance past the character class expression                while (first != last &&*first != ']')                    ++first;                BOOST_ASSERT(*first == ']');                ++first;            }            else {                const escape_char_parser<lex_escapes, char_t> lex_escape_ch =                    escape_char_parser<lex_escapes, char_t>();                char_t c1;                ScannerT scan(first, last);                lex_escape_ch[assign(c1)].parse(scan);                if (*scan.first == '-') // insert a range                {                    ++scan.first;                    char_t c2;                    lex_escape_ch[assign(c2)].parse(scan);                    BOOST_ASSERT(c1 < c2); // Throw exception?                    rr.set(utility::impl::range<char_t>(c1, c2));                }                else // insert 1 char                {                    rr.set(utility::impl::range<char_t>(c1, c1));                }            }        }        if (negated_ccl)        {            rr = ccl_utils::negate_range_run(rr);        }        ccl_utils::create_nodes(rr, m_stack);    }    std::stack<node*>& m_stack;};template <typename ScannerT>class make_any_char{    typedef typename ScannerT::iterator_t iterator_type;public:    typedef        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type        char_t;    std::stack<node*>& m_stack;    make_any_char(std::stack<node*>& the_stack)        : m_stack(the_stack)        {}    void operator()(const char_t c) const    {        BOOST_ASSERT(c == '.');        do_any_char();    }    void do_any_char() const    {        static utility::impl::range_run<char_t> rr;        rr.set(full_range<char_t>());        char_t newline = '\n';        rr.clear(utility::impl::range<char_t>(newline, newline));        ccl_utils::create_nodes(rr, m_stack);    }};template <typename ScannerT>class make_string{    typedef typename ScannerT::iterator_t iterator_type;public:    typedef        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type        char_t;    std::stack<node*>& m_stack;    make_string(std::stack<node*>& the_stack)        : m_stack(the_stack)        {}    void operator()(iterator_type const& first, iterator_type const& last) const    {        BOOST_ASSERT(*first == '"');        iterator_type first_ = first;        ScannerT scan(first_, last);        ++scan.first; // skip over '"'        // empty string not allowed        if (*scan.first == '"')        {            boost::throw_exception(bad_regex());        }        const escape_char_parser<lex_escapes, char_t> lex_escape_ch =            escape_char_parser<lex_escapes, char_t>();        char_t c;        lex_escape_ch[assign(c)].parse(scan);        node* top_node = ccl_utils::create_mb_node_seq(c);        while (*scan.first != '"' && scan.first != scan.last)        {            lex_escape_ch[assign(c)].parse(scan);            node* cur_node = ccl_utils::create_mb_node_seq(c);            top_node = new cat_node(top_node, cur_node);        }        m_stack.push(top_node);    }};inlinenode* repeat_node(node* n, int num){    node* list_of_nodes = n;    for (int i = 1; i < num; ++i)    {        list_of_nodes = new cat_node(list_of_nodes, n->clone());    }    return list_of_nodes;}inlinenode* optional_node(node* n){    return new or_node(n, new epsilon_node());}template <typename ScannerT>class make_rep1{    typedef typename ScannerT::iterator_t iterator_type;public:    typedef        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type        char_t;    std::stack<node*>& m_stack;    make_rep1(std::stack<node*>& the_stack)        : m_stack(the_stack)        {}    void operator()(iterator_type const& first, iterator_type const& last) const    {        BOOST_ASSERT(*first == '{');        iterator_type first_ = first;        ScannerT scan(first_, last);        ++scan.first; // skip over '{'        unsigned int count;        uint_p[assign(count)].parse(scan);        if (count == 0)            boost::throw_exception(bad_regex());        node* top_node = m_stack.top();        m_stack.pop();        top_node = repeat_node(top_node, count);        m_stack.push(top_node);    }};template <typename ScannerT>class make_rep2{    typedef typename ScannerT::iterator_t iterator_type;public:    typedef        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type        char_t;    std::stack<node*>& m_stack;    make_rep2(std::stack<node*>& the_stack)        : m_stack(the_stack)        {}    void operator()(iterator_type const& first, iterator_type const& last) const    {        BOOST_ASSERT(*first == '{');        iterator_type first_ = first;        ScannerT scan (first_, last);        ++scan.first; // skip over '{'        unsigned int count;        uint_p[assign(count)].parse(scan);        if (count == 0)            boost::throw_exception(bad_regex());        node* top_node = m_stack.top();        m_stack.pop();        top_node = new cat_node(repeat_node(top_node, count),                new star_node(top_node->clone()));        m_stack.push(top_node);    }};template <typename ScannerT>class make_rep3{    typedef typename ScannerT::iterator_t iterator_type;public:    typedef        typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type        char_t;    std::stack<node*>& m_stack;    make_rep3(std::stack<node*>& the_stack)        : m_stack(the_stack)        {}    void operator()(iterator_type const& first, iterator_type const& last) const    {        BOOST_ASSERT(*first == '{');        iterator_type first_ = first;        ScannerT scan(first_, last);        ++scan.first; // skip over '{'        unsigned int count1, count2;        uint_p[assign(count1)].parse(scan);        if (count1 == 0)            boost::throw_exception(bad_regex());        ++scan.first; // skip over ','        uint_p[assign(count2)].parse(scan);        if (count2 <= count1)            boost::throw_exception(bad_regex());        node* top_node = m_stack.top();        m_stack.pop();        node* repeats = repeat_node(top_node, count1);        top_node = new cat_node(repeats,                repeat_node(optional_node(top_node->clone()),                    count2 - count1));        m_stack.push(top_node);    }};///////////////////////////////////////////////////////////////////////////////////  Lexer grammar////      Defines the grammar, which mandates the syntax of the understood//      lexeme definitions passed to lexer::register_regex./////////////////////////////////////////////////////////////////////////////////class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>{public:    lexer_grammar(std::stack<node*> &node_stack_)    : node_stack(node_stack_) {}    template <typename ScannerT>    struct definition    {        typedef rule<ScannerT> rule_t;        typedef typename ScannerT::iterator_t iterator_type;        typedef            typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type            char_t;        rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,            escseq, ccl_char;        symbols<> ccl_expr;        definition(lexer_grammar const &self)        {            regex =                    re >> !('/' >> re) >> !ch_p('$')                ;            re =                    series                >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )                ;            series =                    singleton                >>*( singleton[make_concat<ScannerT>(self.node_stack)] )                ;            singleton =                    ch_p('.')[make_any_char<ScannerT>(self.node_stack)]                    >>  singleton2                |   fullccl                    >>  singleton2                |   ('"' >> string >> '"')                    [                        make_string<ScannerT>(self.node_stack)                    ]                    >>  singleton2                |   '(' >> re >> ')'                    >>  singleton2                |   ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)                    [                        make_char<ScannerT>(self.node_stack)                    ]                    >>  singleton2                ;            singleton2 =                    ch_p('*')[make_star<ScannerT>(self.node_stack)]                    >> singleton2                |   ch_p('+')[make_plus<ScannerT>(self.node_stack)]                    >> singleton2                |   ch_p('?')[make_optional<ScannerT>(self.node_stack)]                    >> singleton2                |   ('{' >> uint_p >> '}')                    [                        make_rep1<ScannerT>(self.node_stack)                    ]                    >>  singleton2                |   ('{' >> uint_p >> ',' >> '}')                    [                        make_rep2<ScannerT>(self.node_stack)                    ]                    >>  singleton2                |   ('{' >> uint_p >> ',' >> uint_p >> '}')                    [                        make_rep3<ScannerT>(self.node_stack)                    ]                    >> singleton2                |   epsilon_p                ;            fullccl =                    ('[' >> !ch_p('^') >> ccl >> ']')                    [                        make_ccl<ScannerT>(self.node_stack)                    ]                ;            ccl =                   *(ccl_expr | (ccl_char >> !('-' >> ccl_char)))                ;            ccl_char =                    ( (anychar_p - chset<>("\\\n]")) | escseq )                ;            ccl_expr =                    "[:alnum:]",                    "[:alpha:]",                    "[:blank:]",                    "[:cntrl:]",                    "[:digit:]",                    "[:graph:]",                    "[:lower:]",                    "[:print:]",                    "[:punct:]",                    "[:space:]",                    "[:upper:]",                    "[:xdigit:]"                ;            string =

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -