lexer.hpp
字号:
: m_stack(the_stack) {} void operator()(iterator_type const& first, iterator_type const& last) const { const escape_char_parser<lex_escapes, char_t> lex_escape_ch = escape_char_parser<lex_escapes, char_t>(); char_t the_char; iterator_type first_ = first; ScannerT scan(first_, last); lex_escape_ch[assign(the_char)].parse(scan); node* newnode = ccl_utils::create_mb_node_seq(the_char); m_stack.push(newnode); } std::stack<node*>& m_stack;};template <typename ScannerT>class make_ccl{ typedef typename ScannerT::iterator_t iterator_type;public: typedef typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type char_t; make_ccl(std::stack<node*>& the_stack) : m_stack(the_stack) {} static bool is_equal_to_string(iterator_type first, iterator_type const & last, const char* str) { while (first != last &&*str &&*first ==*str) { ++first; ++str; } return*str == 0; } template <typename ParserT> static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser) { for (int i = 0; i < 256; ++i) { if (parser.test(static_cast<char_t>(uchar(i)))) rr.set(utility::impl::range<char_t>(char_t(i), char_t(i))); } } void operator()(iterator_type const& first_, iterator_type const& last) const { BOOST_ASSERT(*first_ == '['); iterator_type first = first_; ++first; // skip over '[' bool negated_ccl = false; if (*first == '^') { negated_ccl = true; ++first; } utility::impl::range_run<char_t> rr; while (first != last &&*first != ']') { if (*first == '[') // it's a ccl_expr like [:space:] { // check for [:space:], etc. if (is_equal_to_string(first, last, "[:alnum:]")) { fill_ccl(rr, alnum_p); } else if (is_equal_to_string(first, last, "[:alpha:]")) { fill_ccl(rr, alpha_p); } else if (is_equal_to_string(first, last, "[:blank:]")) { fill_ccl(rr, blank_p); } else if (is_equal_to_string(first, last, "[:cntrl:]")) { fill_ccl(rr, cntrl_p); } else if (is_equal_to_string(first, last, "[:digit:]")) { fill_ccl(rr, digit_p); } else if (is_equal_to_string(first, last, "[:graph:]")) { fill_ccl(rr, graph_p); } else if (is_equal_to_string(first, last, "[:lower:]")) { fill_ccl(rr, lower_p); } else if (is_equal_to_string(first, last, "[:print:]")) { fill_ccl(rr, print_p); } else if (is_equal_to_string(first, last, "[:punct:]")) { fill_ccl(rr, punct_p); } else if (is_equal_to_string(first, last, "[:space:]")) { fill_ccl(rr, space_p); } else if (is_equal_to_string(first, last, "[:upper:]")) { fill_ccl(rr, upper_p); } else if (is_equal_to_string(first, last, "[:xdigit:]")) { fill_ccl(rr, xdigit_p); } // this can't happen, because it's parsed before we get here. //else // throw bad_regex(); // Advance past the character class expression while (first != last &&*first != ']') ++first; BOOST_ASSERT(*first == ']'); ++first; } else { const escape_char_parser<lex_escapes, char_t> lex_escape_ch = escape_char_parser<lex_escapes, char_t>(); char_t c1; ScannerT scan(first, last); lex_escape_ch[assign(c1)].parse(scan); if (*scan.first == '-') // insert a range { ++scan.first; char_t c2; lex_escape_ch[assign(c2)].parse(scan); BOOST_ASSERT(c1 < c2); // Throw exception? rr.set(utility::impl::range<char_t>(c1, c2)); } else // insert 1 char { rr.set(utility::impl::range<char_t>(c1, c1)); } } } if (negated_ccl) { rr = ccl_utils::negate_range_run(rr); } ccl_utils::create_nodes(rr, m_stack); } std::stack<node*>& m_stack;};template <typename ScannerT>class make_any_char{ typedef typename ScannerT::iterator_t iterator_type;public: typedef typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type char_t; std::stack<node*>& m_stack; make_any_char(std::stack<node*>& the_stack) : m_stack(the_stack) {} void operator()(const char_t c) const { BOOST_ASSERT(c == '.'); do_any_char(); } void do_any_char() const { static utility::impl::range_run<char_t> rr; rr.set(full_range<char_t>()); char_t newline = '\n'; rr.clear(utility::impl::range<char_t>(newline, newline)); ccl_utils::create_nodes(rr, m_stack); }};template <typename ScannerT>class make_string{ typedef typename ScannerT::iterator_t iterator_type;public: typedef typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type char_t; std::stack<node*>& m_stack; make_string(std::stack<node*>& the_stack) : m_stack(the_stack) {} void operator()(iterator_type const& first, iterator_type const& last) const { BOOST_ASSERT(*first == '"'); iterator_type first_ = first; ScannerT scan(first_, last); ++scan.first; // skip over '"' // empty string not allowed if (*scan.first == '"') { boost::throw_exception(bad_regex()); } const escape_char_parser<lex_escapes, char_t> lex_escape_ch = escape_char_parser<lex_escapes, char_t>(); char_t c; lex_escape_ch[assign(c)].parse(scan); node* top_node = ccl_utils::create_mb_node_seq(c); while (*scan.first != '"' && scan.first != scan.last) { lex_escape_ch[assign(c)].parse(scan); node* cur_node = ccl_utils::create_mb_node_seq(c); top_node = new cat_node(top_node, cur_node); } m_stack.push(top_node); }};inlinenode* repeat_node(node* n, int num){ node* list_of_nodes = n; for (int i = 1; i < num; ++i) { list_of_nodes = new cat_node(list_of_nodes, n->clone()); } return list_of_nodes;}inlinenode* optional_node(node* n){ return new or_node(n, new epsilon_node());}template <typename ScannerT>class make_rep1{ typedef typename ScannerT::iterator_t iterator_type;public: typedef typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type char_t; std::stack<node*>& m_stack; make_rep1(std::stack<node*>& the_stack) : m_stack(the_stack) {} void operator()(iterator_type const& first, iterator_type const& last) const { BOOST_ASSERT(*first == '{'); iterator_type first_ = first; ScannerT scan(first_, last); ++scan.first; // skip over '{' unsigned int count; uint_p[assign(count)].parse(scan); if (count == 0) boost::throw_exception(bad_regex()); node* top_node = m_stack.top(); m_stack.pop(); top_node = repeat_node(top_node, count); m_stack.push(top_node); }};template <typename ScannerT>class make_rep2{ typedef typename ScannerT::iterator_t iterator_type;public: typedef typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type char_t; std::stack<node*>& m_stack; make_rep2(std::stack<node*>& the_stack) : m_stack(the_stack) {} void operator()(iterator_type const& first, iterator_type const& last) const { BOOST_ASSERT(*first == '{'); iterator_type first_ = first; ScannerT scan (first_, last); ++scan.first; // skip over '{' unsigned int count; uint_p[assign(count)].parse(scan); if (count == 0) boost::throw_exception(bad_regex()); node* top_node = m_stack.top(); m_stack.pop(); top_node = new cat_node(repeat_node(top_node, count), new star_node(top_node->clone())); m_stack.push(top_node); }};template <typename ScannerT>class make_rep3{ typedef typename ScannerT::iterator_t iterator_type;public: typedef typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type char_t; std::stack<node*>& m_stack; make_rep3(std::stack<node*>& the_stack) : m_stack(the_stack) {} void operator()(iterator_type const& first, iterator_type const& last) const { BOOST_ASSERT(*first == '{'); iterator_type first_ = first; ScannerT scan(first_, last); ++scan.first; // skip over '{' unsigned int count1, count2; uint_p[assign(count1)].parse(scan); if (count1 == 0) boost::throw_exception(bad_regex()); ++scan.first; // skip over ',' uint_p[assign(count2)].parse(scan); if (count2 <= count1) boost::throw_exception(bad_regex()); node* top_node = m_stack.top(); m_stack.pop(); node* repeats = repeat_node(top_node, count1); top_node = new cat_node(repeats, repeat_node(optional_node(top_node->clone()), count2 - count1)); m_stack.push(top_node); }};/////////////////////////////////////////////////////////////////////////////////// Lexer grammar//// Defines the grammar, which mandates the syntax of the understood// lexeme definitions passed to lexer::register_regex./////////////////////////////////////////////////////////////////////////////////class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>{public: lexer_grammar(std::stack<node*> &node_stack_) : node_stack(node_stack_) {} template <typename ScannerT> struct definition { typedef rule<ScannerT> rule_t; typedef typename ScannerT::iterator_t iterator_type; typedef typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type char_t; rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string, escseq, ccl_char; symbols<> ccl_expr; definition(lexer_grammar const &self) { regex = re >> !('/' >> re) >> !ch_p('$') ; re = series >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] ) ; series = singleton >>*( singleton[make_concat<ScannerT>(self.node_stack)] ) ; singleton = ch_p('.')[make_any_char<ScannerT>(self.node_stack)] >> singleton2 | fullccl >> singleton2 | ('"' >> string >> '"') [ make_string<ScannerT>(self.node_stack) ] >> singleton2 | '(' >> re >> ')' >> singleton2 | ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq) [ make_char<ScannerT>(self.node_stack) ] >> singleton2 ; singleton2 = ch_p('*')[make_star<ScannerT>(self.node_stack)] >> singleton2 | ch_p('+')[make_plus<ScannerT>(self.node_stack)] >> singleton2 | ch_p('?')[make_optional<ScannerT>(self.node_stack)] >> singleton2 | ('{' >> uint_p >> '}') [ make_rep1<ScannerT>(self.node_stack) ] >> singleton2 | ('{' >> uint_p >> ',' >> '}') [ make_rep2<ScannerT>(self.node_stack) ] >> singleton2 | ('{' >> uint_p >> ',' >> uint_p >> '}') [ make_rep3<ScannerT>(self.node_stack) ] >> singleton2 | epsilon_p ; fullccl = ('[' >> !ch_p('^') >> ccl >> ']') [ make_ccl<ScannerT>(self.node_stack) ] ; ccl = *(ccl_expr | (ccl_char >> !('-' >> ccl_char))) ; ccl_char = ( (anychar_p - chset<>("\\\n]")) | escseq ) ; ccl_expr = "[:alnum:]", "[:alpha:]", "[:blank:]", "[:cntrl:]", "[:digit:]", "[:graph:]", "[:lower:]", "[:print:]", "[:punct:]", "[:space:]", "[:upper:]", "[:xdigit:]" ; string =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -