📄 lexer.hpp
字号:
node* top_node = new cat_node(newnode, cnode);
newnode = top_node;
}
// or together the various parts
if (savednode)
{
node* top_node = new or_node(savednode, newnode);
savednode = top_node;
}
else
{
savednode = newnode;
}
}
if (first_time)
{
stack.push(savednode);
}
else
{
node* top = stack.top();
stack.pop();
node* newtop = new or_node(top, savednode);
stack.push(newtop);
}
}
} // namespace ccl_utils
template <typename ScannerT>
class make_char
{
typedef typename ScannerT::iterator_t iterator_type;
public:
typedef
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
char_t;
make_char(std::stack<node*>& the_stack)
: m_stack(the_stack)
{}
void operator()(iterator_type const& first, iterator_type const& last) const
{
const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
escape_char_parser<lex_escapes, char_t>();
char_t the_char;
iterator_type first_ = first;
ScannerT scan(first_, last);
lex_escape_ch[assign(the_char)].parse(scan);
node* newnode = ccl_utils::create_mb_node_seq(the_char);
m_stack.push(newnode);
}
std::stack<node*>& m_stack;
};
template <typename ScannerT>
class make_ccl
{
typedef typename ScannerT::iterator_t iterator_type;
public:
typedef
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
char_t;
make_ccl(std::stack<node*>& the_stack)
: m_stack(the_stack)
{}
static bool is_equal_to_string(iterator_type first,
iterator_type const & last, const char* str)
{
while (first != last &&*str &&*first ==*str)
{
++first;
++str;
}
return*str == 0;
}
template <typename ParserT>
static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
{
for (int i = 0; i < 256; ++i)
{
if (parser.test(static_cast<char_t>(uchar(i))))
rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
}
}
void operator()(iterator_type const& first_, iterator_type const& last) const
{
BOOST_ASSERT(*first_ == '[');
iterator_type first = first_;
++first; // skip over '['
bool negated_ccl = false;
if (*first == '^')
{
negated_ccl = true;
++first;
}
utility::impl::range_run<char_t> rr;
while (first != last &&*first != ']')
{
if (*first == '[') // it's a ccl_expr like [:space:]
{
// check for [:space:], etc.
if (is_equal_to_string(first, last, "[:alnum:]"))
{
fill_ccl(rr, alnum_p);
}
else if (is_equal_to_string(first, last, "[:alpha:]"))
{
fill_ccl(rr, alpha_p);
}
else if (is_equal_to_string(first, last, "[:blank:]"))
{
fill_ccl(rr, blank_p);
}
else if (is_equal_to_string(first, last, "[:cntrl:]"))
{
fill_ccl(rr, cntrl_p);
}
else if (is_equal_to_string(first, last, "[:digit:]"))
{
fill_ccl(rr, digit_p);
}
else if (is_equal_to_string(first, last, "[:graph:]"))
{
fill_ccl(rr, graph_p);
}
else if (is_equal_to_string(first, last, "[:lower:]"))
{
fill_ccl(rr, lower_p);
}
else if (is_equal_to_string(first, last, "[:print:]"))
{
fill_ccl(rr, print_p);
}
else if (is_equal_to_string(first, last, "[:punct:]"))
{
fill_ccl(rr, punct_p);
}
else if (is_equal_to_string(first, last, "[:space:]"))
{
fill_ccl(rr, space_p);
}
else if (is_equal_to_string(first, last, "[:upper:]"))
{
fill_ccl(rr, upper_p);
}
else if (is_equal_to_string(first, last, "[:xdigit:]"))
{
fill_ccl(rr, xdigit_p);
}
// this can't happen, because it's parsed before we get here.
//else
// throw bad_regex();
// Advance past the character class expression
while (first != last &&*first != ']')
++first;
BOOST_ASSERT(*first == ']');
++first;
}
else {
const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
escape_char_parser<lex_escapes, char_t>();
char_t c1;
ScannerT scan(first, last);
lex_escape_ch[assign(c1)].parse(scan);
if (*scan.first == '-') // insert a range
{
++scan.first;
char_t c2;
lex_escape_ch[assign(c2)].parse(scan);
BOOST_ASSERT(c1 < c2); // Throw exception?
rr.set(utility::impl::range<char_t>(c1, c2));
}
else // insert 1 char
{
rr.set(utility::impl::range<char_t>(c1, c1));
}
}
}
if (negated_ccl)
{
rr = ccl_utils::negate_range_run(rr);
}
ccl_utils::create_nodes(rr, m_stack);
}
std::stack<node*>& m_stack;
};
template <typename ScannerT>
class make_any_char
{
typedef typename ScannerT::iterator_t iterator_type;
public:
typedef
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
char_t;
std::stack<node*>& m_stack;
make_any_char(std::stack<node*>& the_stack)
: m_stack(the_stack)
{}
void operator()(const char_t c) const
{
BOOST_ASSERT(c == '.');
do_any_char();
}
void do_any_char() const
{
static utility::impl::range_run<char_t> rr;
rr.set(full_range<char_t>());
char_t newline = '\n';
rr.clear(utility::impl::range<char_t>(newline, newline));
ccl_utils::create_nodes(rr, m_stack);
}
};
template <typename ScannerT>
class make_string
{
typedef typename ScannerT::iterator_t iterator_type;
public:
typedef
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
char_t;
std::stack<node*>& m_stack;
make_string(std::stack<node*>& the_stack)
: m_stack(the_stack)
{}
void operator()(iterator_type const& first, iterator_type const& last) const
{
BOOST_ASSERT(*first == '"');
iterator_type first_ = first;
ScannerT scan(first_, last);
++scan.first; // skip over '"'
// empty string not allowed
if (*scan.first == '"')
{
throw bad_regex();
}
const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
escape_char_parser<lex_escapes, char_t>();
char_t c;
lex_escape_ch[assign(c)].parse(scan);
node* top_node = ccl_utils::create_mb_node_seq(c);
while (*scan.first != '"' && scan.first != scan.last)
{
lex_escape_ch[assign(c)].parse(scan);
node* cur_node = ccl_utils::create_mb_node_seq(c);
top_node = new cat_node(top_node, cur_node);
}
m_stack.push(top_node);
}
};
inline
node* repeat_node(node* n, int num)
{
node* list_of_nodes = n;
for (int i = 1; i < num; ++i)
{
list_of_nodes = new cat_node(list_of_nodes, n->clone());
}
return list_of_nodes;
}
inline
node* optional_node(node* n)
{
return new or_node(n, new epsilon_node());
}
template <typename ScannerT>
class make_rep1
{
typedef typename ScannerT::iterator_t iterator_type;
public:
typedef
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
char_t;
std::stack<node*>& m_stack;
make_rep1(std::stack<node*>& the_stack)
: m_stack(the_stack)
{}
void operator()(iterator_type const& first, iterator_type const& last) const
{
BOOST_ASSERT(*first == '{');
iterator_type first_ = first;
ScannerT scan(first_, last);
++scan.first; // skip over '{'
unsigned int count;
uint_p[assign(count)].parse(scan);
if (count == 0)
throw bad_regex();
node* top_node = m_stack.top();
m_stack.pop();
top_node = repeat_node(top_node, count);
m_stack.push(top_node);
}
};
template <typename ScannerT>
class make_rep2
{
typedef typename ScannerT::iterator_t iterator_type;
public:
typedef
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
char_t;
std::stack<node*>& m_stack;
make_rep2(std::stack<node*>& the_stack)
: m_stack(the_stack)
{}
void operator()(iterator_type const& first, iterator_type const& last) const
{
BOOST_ASSERT(*first == '{');
iterator_type first_ = first;
ScannerT scan (first_, last);
++scan.first; // skip over '{'
unsigned int count;
uint_p[assign(count)].parse(scan);
if (count == 0)
throw bad_regex();
node* top_node = m_stack.top();
m_stack.pop();
top_node = new cat_node(repeat_node(top_node, count),
new star_node(top_node->clone()));
m_stack.push(top_node);
}
};
template <typename ScannerT>
class make_rep3
{
typedef typename ScannerT::iterator_t iterator_type;
public:
typedef
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
char_t;
std::stack<node*>& m_stack;
make_rep3(std::stack<node*>& the_stack)
: m_stack(the_stack)
{}
void operator()(iterator_type const& first, iterator_type const& last) const
{
BOOST_ASSERT(*first == '{');
iterator_type first_ = first;
ScannerT scan(first_, last);
++scan.first; // skip over '{'
unsigned int count1, count2;
uint_p[assign(count1)].parse(scan);
if (count1 == 0)
throw bad_regex();
++scan.first; // skip over ','
uint_p[assign(count2)].parse(scan);
if (count2 <= count1)
throw bad_regex();
node* top_node = m_stack.top();
m_stack.pop();
node* repeats = repeat_node(top_node, count1);
top_node = new cat_node(repeats,
repeat_node(optional_node(top_node->clone()),
count2 - count1));
m_stack.push(top_node);
}
};
///////////////////////////////////////////////////////////////////////////////
//
// Lexer grammar
//
// Defines the grammar, which mandates the syntax of the understood
// lexeme definitions passed to lexer::register_regex.
//
///////////////////////////////////////////////////////////////////////////////
class lexer_grammar : public boost::spirit::grammar<lexer_grammar>
{
public:
lexer_grammar(std::stack<node*> &node_stack_)
: node_stack(node_stack_) {}
template <typename ScannerT>
struct definition
{
typedef rule<ScannerT> rule_t;
typedef typename ScannerT::iterator_t iterator_type;
typedef
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
char_t;
rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
escseq, ccl_char;
symbols<> ccl_expr;
definition(lexer_grammar const &self)
{
regex =
re >> !('/' >> re) >> !ch_p('$')
;
re =
series
>>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
;
series =
singleton
>>*( singleton[make_concat<ScannerT>(self.node_stack)] )
;
singleton =
ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
>> singleton2
| fullccl
>> singleton2
| ('"' >> string >> '"')
[
make_string<ScannerT>(self.node_stack)
]
>> singleton2
| '(' >> re >> ')'
>> singleton2
| ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
[
make_char<ScannerT>(self.node_stack)
]
>> singleton2
;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -