📄 regex.c
字号:
}
else
{ /* Caller did not allocate a buffer. Do it for them. */
bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
}
if (!bufp->buffer) return REG_ESPACE;
bufp->allocated = INIT_BUF_SIZE;
}
begalt = b = bufp->buffer;
/* Loop through the uncompiled pattern until we're at the end. */
while (p != pend)
{
PATFETCH (c);
switch (c)
{
case '^':
{
if ( /* If at start of pattern, it's an operator. */
p == pattern + 1
/* If context independent, it's an operator. */
|| syntax & RE_CONTEXT_INDEP_ANCHORS
/* Otherwise, depends on what's come before. */
|| at_begline_loc_p (pattern, p, syntax))
BUF_PUSH (begline);
else
goto normal_char;
}
break;
case '$':
{
if ( /* If at end of pattern, it's an operator. */
p == pend
/* If context independent, it's an operator. */
|| syntax & RE_CONTEXT_INDEP_ANCHORS
/* Otherwise, depends on what's next. */
|| at_endline_loc_p (p, pend, syntax))
BUF_PUSH (endline);
else
goto normal_char;
}
break;
case '+':
case '?':
if ((syntax & RE_BK_PLUS_QM)
|| (syntax & RE_LIMITED_OPS))
goto normal_char;
handle_plus:
case '*':
/* If there is no previous pattern... */
if (!laststart)
{
if (syntax & RE_CONTEXT_INVALID_OPS)
return REG_BADRPT;
else if (!(syntax & RE_CONTEXT_INDEP_OPS))
goto normal_char;
}
{
/* Are we optimizing this jump? */
boolean keep_string_p = false;
/* 1 means zero (many) matches is allowed. */
char zero_times_ok = 0, many_times_ok = 0;
/* If there is a sequence of repetition chars, collapse it
down to just one (the right one). We can't combine
interval operators with these because of, e.g., `a{2}*',
which should only match an even number of `a's. */
for (;;)
{
zero_times_ok |= c != '+';
many_times_ok |= c != '?';
if (p == pend)
break;
PATFETCH (c);
if (c == '*'
|| (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
;
else if (syntax & RE_BK_PLUS_QM && c == '\\')
{
if (p == pend) return REG_EESCAPE;
PATFETCH (c1);
if (!(c1 == '+' || c1 == '?'))
{
PATUNFETCH;
PATUNFETCH;
break;
}
c = c1;
}
else
{
PATUNFETCH;
break;
}
/* If we get here, we found another repeat character. */
}
/* Star, etc. applied to an empty pattern is equivalent
to an empty pattern. */
if (!laststart)
break;
/* Now we know whether or not zero matches is allowed
and also whether or not two or more matches is allowed. */
if (many_times_ok)
{ /* More than one repetition is allowed, so put in at the
end a backward relative jump from `b' to before the next
jump we're going to put in below (which jumps from
laststart to after this jump).
But if we are at the `*' in the exact sequence `.*\n',
insert an unconditional jump backwards to the .,
instead of the beginning of the loop. This way we only
push a failure point once, instead of every time
through the loop. */
assert (p - 1 > pattern);
/* Allocate the space for the jump. */
GET_BUFFER_SPACE (3);
/* We know we are not at the first character of the pattern,
because laststart was nonzero. And we've already
incremented `p', by the way, to be the character after
the `*'. Do we have to do something analogous here
for null bytes, because of RE_DOT_NOT_NULL? */
if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
&& zero_times_ok
&& p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
&& !(syntax & RE_DOT_NEWLINE))
{ /* We have .*\n. */
STORE_JUMP (jump, b, laststart);
keep_string_p = true;
}
else
/* Anything else. */
STORE_JUMP (maybe_pop_jump, b, laststart - 3);
/* We've added more stuff to the buffer. */
b += 3;
}
/* On failure, jump from laststart to b + 3, which will be the
end of the buffer after this jump is inserted. */
GET_BUFFER_SPACE (3);
INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
: on_failure_jump,
laststart, b + 3);
pending_exact = 0;
b += 3;
if (!zero_times_ok)
{
/* At least one repetition is required, so insert a
`dummy_failure_jump' before the initial
`on_failure_jump' instruction of the loop. This
effects a skip over that instruction the first time
we hit that loop. */
GET_BUFFER_SPACE (3);
INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
b += 3;
}
}
break;
case '.':
laststart = b;
BUF_PUSH (anychar);
break;
case '[':
{
boolean had_char_class = false;
if (p == pend) return REG_EBRACK;
/* Ensure that we have enough space to push a charset: the
opcode, the length count, and the bitset; 34 bytes in all. */
GET_BUFFER_SPACE (34);
laststart = b;
/* We test `*p == '^' twice, instead of using an if
statement, so we only need one BUF_PUSH. */
BUF_PUSH (*p == '^' ? charset_not : charset);
if (*p == '^')
p++;
/* Remember the first position in the bracket expression. */
p1 = p;
/* Push the number of bytes in the bitmap. */
BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
/* Clear the whole map. */
bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
/* charset_not matches newline according to a syntax bit. */
if ((re_opcode_t) b[-2] == charset_not
&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
SET_LIST_BIT ('\n');
/* Read in characters and ranges, setting map bits. */
for (;;)
{
if (p == pend) return REG_EBRACK;
PATFETCH (c);
/* \ might escape characters inside [...] and [^...]. */
if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
{
if (p == pend) return REG_EESCAPE;
PATFETCH (c1);
SET_LIST_BIT (c1);
continue;
}
/* Could be the end of the bracket expression. If it's
not (i.e., when the bracket expression is `[]' so
far), the ']' character bit gets set way below. */
if (c == ']' && p != p1 + 1)
break;
/* Look ahead to see if it's a range when the last thing
was a character class. */
if (had_char_class && c == '-' && *p != ']')
return REG_ERANGE;
/* Look ahead to see if it's a range when the last thing
was a character: if this is a hyphen not at the
beginning or the end of a list, then it's the range
operator. */
if (c == '-'
&& !(p - 2 >= pattern && p[-2] == '[')
&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
&& *p != ']')
{
reg_errcode_t ret
= compile_range (&p, pend, translate, syntax, b);
if (ret != REG_NOERROR) return ret;
}
else if (p[0] == '-' && p[1] != ']')
{ /* This handles ranges made up of characters only. */
reg_errcode_t ret;
/* Move past the `-'. */
PATFETCH (c1);
ret = compile_range (&p, pend, translate, syntax, b);
if (ret != REG_NOERROR) return ret;
}
/* See if we're at the beginning of a possible character
class. */
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
{ /* Leave room for the null. */
char str[CHAR_CLASS_MAX_LENGTH + 1];
PATFETCH (c);
c1 = 0;
/* If pattern is `[[:'. */
if (p == pend) return REG_EBRACK;
for (;;)
{
PATFETCH (c);
if (c == ':' || c == ']' || p == pend
|| c1 == CHAR_CLASS_MAX_LENGTH)
break;
str[c1++] = c;
}
str[c1] = '\0';
/* If isn't a word bracketed by `[:' and:`]':
undo the ending character, the letters, and leave
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
int ch;
boolean is_alnum = STREQ (str, "alnum");
boolean is_alpha = STREQ (str, "alpha");
boolean is_blank = STREQ (str, "blank");
boolean is_cntrl = STREQ (str, "cntrl");
boolean is_digit = STREQ (str, "digit");
boolean is_graph = STREQ (str, "graph");
boolean is_lower = STREQ (str, "lower");
boolean is_print = STREQ (str, "print");
boolean is_punct = STREQ (str, "punct");
boolean is_space = STREQ (str, "space");
boolean is_upper = STREQ (str, "upper");
boolean is_xdigit = STREQ (str, "xdigit");
if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
/* Throw away the ] at the end of the character
class. */
PATFETCH (c);
if (p == pend) return REG_EBRACK;
for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
{
if ( (is_alnum && ISALNUM (ch))
|| (is_alpha && ISALPHA (ch))
|| (is_blank && ISBLANK (ch))
|| (is_cntrl && ISCNTRL (ch))
|| (is_digit && ISDIGIT (ch))
|| (is_graph && ISGRAPH (ch))
|| (is_lower && ISLOWER (ch))
|| (is_print && ISPRINT (ch))
|| (is_punct && ISPUNCT (ch))
|| (is_space && ISSPACE (ch))
|| (is_upper && ISUPPER (ch))
|| (is_xdigit && ISXDIGIT (ch)))
SET_LIST_BIT (ch);
}
had_char_class = true;
}
else
{
c1++;
while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT (':');
had_char_class = false;
}
}
else
{
had_char_class = false;
SET_LIST_BIT (c);
}
}
/* Discard any (non)matching list bytes that are all 0 at the
end of the map. Decrease the map-length byte too. */
while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
b[-1]--;
b += b[-1];
}
break;
case '(':
if (syntax & RE_NO_BK_PARENS)
goto handle_open;
else
goto normal_char;
case ')':
if (syntax & RE_NO_BK_PARENS)
goto handle_close;
else
goto normal_char;
case '\n':
if (syntax & RE_NEWLINE_ALT)
goto handle_alt;
else
goto normal_char;
case '|':
if (syntax & RE_NO_BK_VBAR)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -