📄 regex.c
字号:
goto handle_alt;
else
goto normal_char;
case '{':
if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
goto handle_interval;
else
goto normal_char;
case '\\':
if (p == pend) return REG_EESCAPE;
/* Do not translate the character after the \, so that we can
distinguish, e.g., \B from \b, even if we normally would
translate, e.g., B to b. */
PATFETCH_RAW (c);
switch (c)
{
case '(':
if (syntax & RE_NO_BK_PARENS)
goto normal_backslash;
handle_open:
bufp->re_nsub++;
regnum++;
if (COMPILE_STACK_FULL)
{
RETALLOC (compile_stack.stack, compile_stack.size << 1,
compile_stack_elt_t);
if (compile_stack.stack == NULL) return REG_ESPACE;
compile_stack.size <<= 1;
}
/* These are the values to restore when we hit end of this
group. They are all relative offsets, so that if the
whole pattern moves because of realloc, they will still
be valid. */
COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
COMPILE_STACK_TOP.fixup_alt_jump
= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
COMPILE_STACK_TOP.regnum = regnum;
/* We will eventually replace the 0 with the number of
groups inner to this one. But do not push a
start_memory for groups beyond the last one we can
represent in the compiled pattern. */
if (regnum <= MAX_REGNUM)
{
COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
BUF_PUSH_3 (start_memory, regnum, 0);
}
compile_stack.avail++;
fixup_alt_jump = 0;
laststart = 0;
begalt = b;
/* If we've reached MAX_REGNUM groups, then this open
won't actually generate any code, so we'll have to
clear pending_exact explicitly. */
pending_exact = 0;
break;
case ')':
if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
if (COMPILE_STACK_EMPTY)
if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_backslash;
else
return REG_ERPAREN;
handle_close:
if (fixup_alt_jump)
{ /* Push a dummy failure point at the end of the
alternative for a possible future
`pop_failure_jump' to pop. See comments at
`push_dummy_failure' in `re_match_2'. */
BUF_PUSH (push_dummy_failure);
/* We allocated space for this jump when we assigned
to `fixup_alt_jump', in the `handle_alt' case below. */
STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
}
/* See similar code for backslashed left paren above. */
if (COMPILE_STACK_EMPTY)
if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_char;
else
return REG_ERPAREN;
/* Since we just checked for an empty stack above, this
``can't happen''. */
assert (compile_stack.avail != 0);
{
/* We don't just want to restore into `regnum', because
later groups should continue to be numbered higher,
as in `(ab)c(de)' -- the second group is #2. */
regnum_t this_group_regnum;
compile_stack.avail--;
begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
fixup_alt_jump
= COMPILE_STACK_TOP.fixup_alt_jump
? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
: 0;
laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
this_group_regnum = COMPILE_STACK_TOP.regnum;
/* If we've reached MAX_REGNUM groups, then this open
won't actually generate any code, so we'll have to
clear pending_exact explicitly. */
pending_exact = 0;
/* We're at the end of the group, so now we know how many
groups were inside this one. */
if (this_group_regnum <= MAX_REGNUM)
{
unsigned char *inner_group_loc
= bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
*inner_group_loc = regnum - this_group_regnum;
BUF_PUSH_3 (stop_memory, this_group_regnum,
regnum - this_group_regnum);
}
}
break;
case '|': /* `\|'. */
if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
goto normal_backslash;
handle_alt:
if (syntax & RE_LIMITED_OPS)
goto normal_char;
/* Insert before the previous alternative a jump which
jumps to this alternative if the former fails. */
GET_BUFFER_SPACE (3);
INSERT_JUMP (on_failure_jump, begalt, b + 6);
pending_exact = 0;
b += 3;
/* The alternative before this one has a jump after it
which gets executed if it gets matched. Adjust that
jump so it will jump to this alternative's analogous
jump (put in below, which in turn will jump to the next
(if any) alternative's such jump, etc.). The last such
jump jumps to the correct final destination. A picture:
_____ _____
| | | |
| v | v
a | b | c
If we are at `b', then fixup_alt_jump right now points to a
three-byte space after `a'. We'll put in the jump, set
fixup_alt_jump to right after `b', and leave behind three
bytes which we'll fill in when we get to after `c'. */
if (fixup_alt_jump)
STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
/* Mark and leave space for a jump after this alternative,
to be filled in later either by next alternative or
when know we're at the end of a series of alternatives. */
fixup_alt_jump = b;
GET_BUFFER_SPACE (3);
b += 3;
laststart = 0;
begalt = b;
break;
case '{':
/* If \{ is a literal. */
if (!(syntax & RE_INTERVALS)
/* If we're at `\{' and it's not the open-interval
operator. */
|| ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
|| (p - 2 == pattern && p == pend))
goto normal_backslash;
handle_interval:
{
/* If got here, then the syntax allows intervals. */
/* At least (most) this many matches must be made. */
int lower_bound = -1, upper_bound = -1;
beg_interval = p - 1;
if (p == pend)
{
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
else
return REG_EBRACE;
}
GET_UNSIGNED_NUMBER (lower_bound);
if (c == ',')
{
GET_UNSIGNED_NUMBER (upper_bound);
if (upper_bound < 0) upper_bound = RE_DUP_MAX;
}
else
/* Interval such as `{1}' => match exactly once. */
upper_bound = lower_bound;
if (lower_bound < 0 || upper_bound > RE_DUP_MAX
|| lower_bound > upper_bound)
{
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
else
return REG_BADBR;
}
if (!(syntax & RE_NO_BK_BRACES))
{
if (c != '\\') return REG_EBRACE;
PATFETCH (c);
}
if (c != '}')
{
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
else
return REG_BADBR;
}
/* We just parsed a valid interval. */
/* If it's invalid to have no preceding re. */
if (!laststart)
{
if (syntax & RE_CONTEXT_INVALID_OPS)
return REG_BADRPT;
else if (syntax & RE_CONTEXT_INDEP_OPS)
laststart = b;
else
goto unfetch_interval;
}
/* If the upper bound is zero, don't want to succeed at
all; jump from `laststart' to `b + 3', which will be
the end of the buffer after we insert the jump. */
if (upper_bound == 0)
{
GET_BUFFER_SPACE (3);
INSERT_JUMP (jump, laststart, b + 3);
b += 3;
}
/* Otherwise, we have a nontrivial interval. When
we're all done, the pattern will look like:
set_number_at <jump count> <upper bound>
set_number_at <succeed_n count> <lower bound>
succeed_n <after jump addr> <succed_n count>
<body of loop>
jump_n <succeed_n addr> <jump count>
(The upper bound and `jump_n' are omitted if
`upper_bound' is 1, though.) */
else
{ /* If the upper bound is > 1, we need to insert
more at the end of the loop. */
unsigned nbytes = 10 + (upper_bound > 1) * 10;
GET_BUFFER_SPACE (nbytes);
/* Initialize lower bound of the `succeed_n', even
though it will be set during matching by its
attendant `set_number_at' (inserted next),
because `re_compile_fastmap' needs to know.
Jump to the `jump_n' we might insert below. */
INSERT_JUMP2 (succeed_n, laststart,
b + 5 + (upper_bound > 1) * 5,
lower_bound);
b += 5;
/* Code to initialize the lower bound. Insert
before the `succeed_n'. The `5' is the last two
bytes of this `set_number_at', plus 3 bytes of
the following `succeed_n'. */
insert_op2 (set_number_at, laststart, 5, lower_bound, b);
b += 5;
if (upper_bound > 1)
{ /* More than one repetition is allowed, so
append a backward jump to the `succeed_n'
that starts this interval.
When we've reached this during matching,
we'll have matched the interval once, so
jump back only `upper_bound - 1' times. */
STORE_JUMP2 (jump_n, b, laststart + 5,
upper_bound - 1);
b += 5;
/* The location we want to set is the second
parameter of the `jump_n'; that is `b-2' as
an absolute address. `laststart' will be
the `set_number_at' we're about to insert;
`laststart+3' the number to set, the source
for the relative address. But we are
inserting into the middle of the pattern --
so everything is getting moved up by 5.
Conclusion: (b - 2) - (laststart + 3) + 5,
i.e., b - laststart.
We insert this at the beginning of the loop
so that if we fail during matching, we'll
reinitialize the bounds. */
insert_op2 (set_number_at, laststart, b - laststart,
upper_bound - 1, b);
b += 5;
}
}
pending_exact = 0;
beg_interval = NULL;
}
break;
unfetch_interval:
/* If an invalid interval, match the characters as literals. */
assert (beg_interval);
p = beg_interval;
beg_interval = NULL;
/* normal_char and normal_backslash need `c'. */
PATFETCH (c);
if (!(syntax & RE_NO_BK_BRACES))
{
if (p > pattern && p[-1] == '\\')
goto normal_backslash;
}
goto normal_char;
#ifdef emacs
/* There is no way to specify the before_dot and after_dot
operators. rms says this is ok. --karl */
case '=':
BUF_PUSH (at_dot);
break;
case 's':
laststart = b;
PATFETCH (c);
BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
break;
case 'S':
laststart = b;
PATFETCH (c);
BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
break;
#endif /* emacs */
case 'w':
laststart = b;
BUF_PUSH (wordchar);
break;
case 'W':
laststart = b;
BUF_PUSH (notwordchar);
break;
case '<':
BUF_PUSH (wordbeg);
break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -