📄 regex.c
字号:
/* In processing a repeat, 1 means zero matches is allowed. */
char zero_times_ok;
/* In processing a repeat, 1 means many matches is allowed. */
char many_times_ok;
/* Address of beginning of regexp, or inside of last \(. */
char *begalt = b;
/* In processing an interval, at least this many matches must be made. */
int lower_bound;
/* In processing an interval, at most this many matches can be made. */
int upper_bound;
/* Place in pattern (i.e., the {) to which to go back if the interval
is invalid. */
char *beg_interval = 0;
/* Stack of information saved by \( and restored by \).
Four stack elements are pushed by each \(:
First, the value of b.
Second, the value of fixup_jump.
Third, the value of regnum.
Fourth, the value of begalt. */
int stackb[40];
int *stackp = stackb;
int *stacke = stackb + 40;
int *stackt;
/* Counts \('s as they are encountered. Remembered for the matching \),
where it becomes the register number to put in the stop_memory
command. */
int regnum = 1;
bufp->fastmap_accurate = 0;
#ifndef emacs
#ifndef SYNTAX_TABLE
/* Initialize the syntax table. */
init_syntax_once();
#endif
#endif
if (bufp->allocated == 0)
{
bufp->allocated = INIT_BUF_SIZE;
if (bufp->buffer)
/* EXTEND_BUFFER loses when bufp->allocated is 0. */
bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
else
/* Caller did not allocate a buffer. Do it for them. */
bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
if (!bufp->buffer) goto memory_exhausted;
begalt = b = bufp->buffer;
}
while (p != pend)
{
PATFETCH (c);
switch (c)
{
case '$':
{
char *p1 = p;
/* When testing what follows the $,
look past the \-constructs that don't consume anything. */
if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
while (p1 != pend)
{
if (*p1 == '\\' && p1 + 1 != pend
&& (p1[1] == '<' || p1[1] == '>'
|| p1[1] == '`' || p1[1] == '\''
#ifdef emacs
|| p1[1] == '='
#endif
|| p1[1] == 'b' || p1[1] == 'B'))
p1 += 2;
else
break;
}
if (obscure_syntax & RE_TIGHT_VBAR)
{
if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
goto normal_char;
/* Make operand of last vbar end before this `$'. */
if (fixup_jump)
store_jump (fixup_jump, jump, b);
fixup_jump = 0;
BUFPUSH (endline);
break;
}
/* $ means succeed if at end of line, but only in special contexts.
If validly in the middle of a pattern, it is a normal character. */
if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
goto invalid_pattern;
if (p1 == pend || *p1 == '\n'
|| (obscure_syntax & RE_CONTEXT_INDEP_OPS)
|| (obscure_syntax & RE_NO_BK_PARENS
? *p1 == ')'
: *p1 == '\\' && p1[1] == ')')
|| (obscure_syntax & RE_NO_BK_VBAR
? *p1 == '|'
: *p1 == '\\' && p1[1] == '|'))
{
BUFPUSH (endline);
break;
}
goto normal_char;
}
case '^':
/* ^ means succeed if at beg of line, but only if no preceding
pattern. */
if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
goto invalid_pattern;
if (laststart && p - 2 >= pattern && p[-2] != '\n'
&& !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
goto normal_char;
if (obscure_syntax & RE_TIGHT_VBAR)
{
if (p != pattern + 1
&& ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
goto normal_char;
BUFPUSH (begline);
begalt = b;
}
else
BUFPUSH (begline);
break;
case '+':
case '?':
if ((obscure_syntax & RE_BK_PLUS_QM)
|| (obscure_syntax & RE_LIMITED_OPS))
goto normal_char;
handle_plus:
case '*':
/* If there is no previous pattern, char not special. */
if (!laststart)
{
if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
goto invalid_pattern;
else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
goto normal_char;
}
/* If there is a sequence of repetition chars,
collapse it down to just one. */
zero_times_ok = 0;
many_times_ok = 0;
while (1)
{
zero_times_ok |= c != '+';
many_times_ok |= c != '?';
if (p == pend)
break;
PATFETCH (c);
if (c == '*')
;
else if (!(obscure_syntax & RE_BK_PLUS_QM)
&& (c == '+' || c == '?'))
;
else if ((obscure_syntax & RE_BK_PLUS_QM)
&& c == '\\')
{
int c1;
PATFETCH (c1);
if (!(c1 == '+' || c1 == '?'))
{
PATUNFETCH;
PATUNFETCH;
break;
}
c = c1;
}
else
{
PATUNFETCH;
break;
}
}
/* Star, etc. applied to an empty pattern is equivalent
to an empty pattern. */
if (!laststart)
break;
/* Now we know whether or not zero matches is allowed
and also whether or not two or more matches is allowed. */
if (many_times_ok)
{
/* If more than one repetition is allowed, put in at the
end a backward relative jump from b to before the next
jump we're going to put in below (which jumps from
laststart to after this jump). */
GET_BUFFER_SPACE (3);
store_jump (b, maybe_finalize_jump, laststart - 3);
b += 3; /* Because store_jump put stuff here. */
}
/* On failure, jump from laststart to b + 3, which will be the
end of the buffer after this jump is inserted. */
GET_BUFFER_SPACE (3);
insert_jump (on_failure_jump, laststart, b + 3, b);
pending_exact = 0;
b += 3;
if (!zero_times_ok)
{
/* At least one repetition is required, so insert a
dummy-failure before the initial on-failure-jump
instruction of the loop. This effects a skip over that
instruction the first time we hit that loop. */
GET_BUFFER_SPACE (6);
insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
b += 3;
}
break;
case '.':
laststart = b;
BUFPUSH (anychar);
break;
case '[':
if (p == pend)
goto invalid_pattern;
while (b - bufp->buffer
> bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
EXTEND_BUFFER;
laststart = b;
if (*p == '^')
{
BUFPUSH (charset_not);
p++;
}
else
BUFPUSH (charset);
p1 = p;
BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
/* Clear the whole map */
bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
SET_LIST_BIT ('\n');
/* Read in characters and ranges, setting map bits. */
while (1)
{
/* Don't translate while fetching, in case it's a range bound.
When we set the bit for the character, we translate it. */
PATFETCH_RAW (c);
/* If set, \ escapes characters when inside [...]. */
if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
{
PATFETCH(c1);
SET_LIST_BIT (c1);
continue;
}
if (c == ']')
{
if (p == p1 + 1)
{
/* If this is an empty bracket expression. */
if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)
&& p == pend)
goto invalid_pattern;
}
else
/* Stop if this isn't merely a ] inside a bracket
expression, but rather the end of a bracket
expression. */
break;
}
/* Get a range. */
if (p[0] == '-' && p[1] != ']')
{
PATFETCH (c1);
/* Don't translate the range bounds while fetching them. */
PATFETCH_RAW (c1);
if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
goto invalid_pattern;
if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)
&& c1 == '-' && *p != ']')
goto invalid_pattern;
while (c <= c1)
{
/* Translate each char that's in the range. */
if (translate)
SET_LIST_BIT (translate[c]);
else
SET_LIST_BIT (c);
c++;
}
}
else if ((obscure_syntax & RE_CHAR_CLASSES)
&& c == '[' && p[0] == ':')
{
/* Longest valid character class word has six characters. */
char str[CHAR_CLASS_MAX_LENGTH];
PATFETCH (c);
c1 = 0;
/* If no ] at end. */
if (p == pend)
goto invalid_pattern;
while (1)
{
/* Don't translate the ``character class'' characters. */
PATFETCH_RAW (c);
if (c == ':' || c == ']' || p == pend
|| c1 == CHAR_CLASS_MAX_LENGTH)
break;
str[c1++] = c;
}
str[c1] = '\0';
if (p == pend
|| c == ']' /* End of the bracket expression. */
|| p[0] != ']'
|| p + 1 == pend
|| (strcmp (str, "alpha") != 0
&& strcmp (str, "upper") != 0
&& strcmp (str, "lower") != 0
&& strcmp (str, "digit") != 0
&& strcmp (str, "alnum") != 0
&& strcmp (str, "xdigit") != 0
&& strcmp (str, "space") != 0
&& strcmp (str, "print") != 0
&& strcmp (str, "punct") != 0
&& strcmp (str, "graph") != 0
&& strcmp (str, "cntrl") != 0))
{
/* Undo the ending character, the letters, and leave
the leading : and [ (but set bits for them). */
c1++;
while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT (':');
}
else
{
/* The ] at the end of the character class. */
PATFETCH (c);
if (c != ']')
goto invalid_pattern;
for (c = 0; c < (1 << BYTEWIDTH); c++)
{
if ((strcmp (str, "alpha") == 0 && isalpha (c))
|| (strcmp (str, "upper") == 0 && isupper (c))
|| (strcmp (str, "lower") == 0 && islower (c))
|| (strcmp (str, "digit") == 0 && isdigit (c))
|| (strcmp (str, "alnum") == 0 && isalnum (c))
|| (strcmp (str, "xdigit") == 0 && isxdigit (c))
|| (strcmp (str, "space") == 0 && isspace (c))
|| (strcmp (str, "print") == 0 && isprint (c))
|| (strcmp (str, "punct") == 0 && ispunct (c))
|| (strcmp (str, "graph") == 0 && isgraph (c))
|| (strcmp (str, "cntrl") == 0 && iscntrl (c)))
SET_LIST_BIT (c);
}
}
}
else if (translate)
SET_LIST_BIT (translate[c]);
else
SET_LIST_BIT (c);
}
/* Discard any character set/class bitmap bytes that are all
0 at the end of the map. Decrement the map-length byte too. */
while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
b[-1]--;
b += b[-1];
break;
case '(':
if (! (obscure_syntax & RE_NO_BK_PARENS))
goto normal_char;
else
goto handle_open;
case ')':
if (! (obscure_syntax & RE_NO_BK_PARENS))
goto normal_char;
else
goto handle_close;
case '\n':
if (! (obscure_syntax & RE_NEWLINE_OR))
goto normal_char;
else
goto handle_bar;
case '|':
if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
&& (! laststart || p == pend))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -