📄 regex.c
字号:
else
{
b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
}
}
/* Discard any bitmap bytes that are all 0 at the end of the map.
Decrement the map-length byte too. */
while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
b[-1]--;
b += b[-1];
break;
case '(':
if (! (obscure_syntax & RE_NO_BK_PARENS))
goto normal_char;
else
goto handle_open;
case ')':
if (! (obscure_syntax & RE_NO_BK_PARENS))
goto normal_char;
else
goto handle_close;
case '\n':
if (! (obscure_syntax & RE_NEWLINE_OR))
goto normal_char;
else
goto handle_bar;
case '|':
if (! (obscure_syntax & RE_NO_BK_VBAR))
goto normal_char;
else
goto handle_bar;
case '\\':
if (p == pend) goto invalid_pattern;
PATFETCH_RAW (c);
switch (c)
{
case '(':
if (obscure_syntax & RE_NO_BK_PARENS)
goto normal_backsl;
handle_open:
if (stackp == stacke) goto nesting_too_deep;
if (regnum < RE_NREGS)
{
PATPUSH (start_memory);
PATPUSH (regnum);
}
*stackp++ = b - bufp->buffer;
*stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
*stackp++ = regnum++;
*stackp++ = begalt - bufp->buffer;
fixup_jump = 0;
laststart = 0;
begalt = b;
break;
case ')':
if (obscure_syntax & RE_NO_BK_PARENS)
goto normal_backsl;
handle_close:
if (stackp == stackb) goto unmatched_close;
begalt = *--stackp + bufp->buffer;
if (fixup_jump)
store_jump (fixup_jump, jump, b);
if (stackp[-1] < RE_NREGS)
{
PATPUSH (stop_memory);
PATPUSH (stackp[-1]);
}
stackp -= 2;
fixup_jump = 0;
if (*stackp)
fixup_jump = *stackp + bufp->buffer - 1;
laststart = *--stackp + bufp->buffer;
break;
case '|':
if (obscure_syntax & RE_NO_BK_VBAR)
goto normal_backsl;
handle_bar:
insert_jump (on_failure_jump, begalt, b + 6, b);
pending_exact = 0;
b += 3;
if (fixup_jump)
store_jump (fixup_jump, jump, b);
fixup_jump = b;
b += 3;
laststart = 0;
begalt = b;
break;
#ifdef emacs
case '=':
PATPUSH (at_dot);
break;
case 's':
laststart = b;
PATPUSH (syntaxspec);
PATFETCH (c);
PATPUSH (syntax_spec_code[c]);
break;
case 'S':
laststart = b;
PATPUSH (notsyntaxspec);
PATFETCH (c);
PATPUSH (syntax_spec_code[c]);
break;
#endif /* emacs */
case 'w':
laststart = b;
PATPUSH (wordchar);
break;
case 'W':
laststart = b;
PATPUSH (notwordchar);
break;
case '<':
PATPUSH (wordbeg);
break;
case '>':
PATPUSH (wordend);
break;
case 'b':
PATPUSH (wordbound);
break;
case 'B':
PATPUSH (notwordbound);
break;
case '`':
PATPUSH (begbuf);
break;
case '\'':
PATPUSH (endbuf);
break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
c1 = c - '0';
if (c1 >= regnum)
goto normal_char;
for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
if (*stackt == c1)
goto normal_char;
laststart = b;
PATPUSH (duplicate);
PATPUSH (c1);
break;
case '+':
case '?':
if (obscure_syntax & RE_BK_PLUS_QM)
goto handle_plus;
default:
normal_backsl:
/* You might think it would be useful for \ to mean
not to translate; but if we don't translate it
it will never match anything. */
if (translate) c = translate[c];
goto normal_char;
}
break;
default:
normal_char:
if (!pending_exact || pending_exact + *pending_exact + 1 != b
|| *pending_exact == 0177 || *p == '*' || *p == '^'
|| ((obscure_syntax & RE_BK_PLUS_QM)
? *p == '\\' && (p[1] == '+' || p[1] == '?')
: (*p == '+' || *p == '?')))
{
laststart = b;
PATPUSH (exactn);
pending_exact = b;
PATPUSH (0);
}
PATPUSH (c);
(*pending_exact)++;
}
}
if (fixup_jump)
store_jump (fixup_jump, jump, b);
if (stackp != stackb) goto unmatched_open;
bufp->used = b - bufp->buffer;
return 0;
invalid_pattern:
return "Invalid regular expression";
unmatched_open:
return "Unmatched \\(";
unmatched_close:
return "Unmatched \\)";
end_of_pattern:
return "Premature end of regular expression";
nesting_too_deep:
return "Nesting too deep";
too_big:
return "Regular expression too big";
memory_exhausted:
return "Memory exhausted";
}
/* Store where `from' points a jump operation to jump to where `to' points.
`opcode' is the opcode to store. */
static int
store_jump (from, opcode, to)
char *from, *to;
char opcode;
{
from[0] = opcode;
from[1] = (to - (from + 3)) & 0377;
from[2] = (to - (from + 3)) >> 8;
}
/* Open up space at char FROM, and insert there a jump to TO.
CURRENT_END gives te end of the storage no in use,
so we know how much data to copy up.
OP is the opcode of the jump to insert.
If you call this function, you must zero out pending_exact. */
static int
insert_jump (op, from, to, current_end)
char op;
char *from, *to, *current_end;
{
register char *pto = current_end + 3;
register char *pfrom = current_end;
while (pfrom != from)
*--pto = *--pfrom;
store_jump (from, op, to);
}
/* Given a pattern, compute a fastmap from it.
The fastmap records which of the (1 << BYTEWIDTH) possible characters
can start a string that matches the pattern.
This fastmap is used by re_search to skip quickly over totally implausible text.
The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
as bufp->fastmap.
The other components of bufp describe the pattern to be used. */
void
re_compile_fastmap (bufp)
struct re_pattern_buffer *bufp;
{
unsigned char *pattern = (unsigned char *) bufp->buffer;
int size = bufp->used;
register char *fastmap = bufp->fastmap;
register unsigned char *p = pattern;
register unsigned char *pend = pattern + size;
register int j, k;
unsigned char *translate = (unsigned char *) bufp->translate;
unsigned char *stackb[NFAILURES];
unsigned char **stackp = stackb;
bzero (fastmap, (1 << BYTEWIDTH));
bufp->fastmap_accurate = 1;
bufp->can_be_null = 0;
while (p)
{
if (p == pend)
{
bufp->can_be_null = 1;
break;
}
#ifdef SWITCH_ENUM_BUG
switch ((int) ((enum regexpcode) *p++))
#else
switch ((enum regexpcode) *p++)
#endif
{
case exactn:
if (translate)
fastmap[translate[p[1]]] = 1;
else
fastmap[p[1]] = 1;
break;
case begline:
case before_dot:
case at_dot:
case after_dot:
case begbuf:
case endbuf:
case wordbound:
case notwordbound:
case wordbeg:
case wordend:
continue;
case endline:
if (translate)
fastmap[translate['\n']] = 1;
else
fastmap['\n'] = 1;
if (bufp->can_be_null != 1)
bufp->can_be_null = 2;
break;
case finalize_jump:
case maybe_finalize_jump:
case jump:
case dummy_failure_jump:
bufp->can_be_null = 1;
j = *p++ & 0377;
j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
p += j + 1; /* The 1 compensates for missing ++ above */
if (j > 0)
continue;
/* Jump backward reached implies we just went through
the body of a loop and matched nothing.
Opcode jumped to should be an on_failure_jump.
Just treat it like an ordinary jump.
For a * loop, it has pushed its failure point already;
if so, discard that as redundant. */
if ((enum regexpcode) *p != on_failure_jump)
continue;
p++;
j = *p++ & 0377;
j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
p += j + 1; /* The 1 compensates for missing ++ above */
if (stackp != stackb && *stackp == p)
stackp--;
continue;
case on_failure_jump:
j = *p++ & 0377;
j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
p++;
*++stackp = p + j;
continue;
case start_memory:
case stop_memory:
p++;
continue;
case duplicate:
bufp->can_be_null = 1;
fastmap['\n'] = 1;
case anychar:
for (j = 0; j < (1 << BYTEWIDTH); j++)
if (j != '\n')
fastmap[j] = 1;
if (bufp->can_be_null)
return;
/* Don't return; check the alternative paths
so we can set can_be_null if appropriate. */
break;
case wordchar:
for (j = 0; j < (1 << BYTEWIDTH); j++)
if (SYNTAX (j) == Sword)
fastmap[j] = 1;
break;
case notwordchar:
for (j = 0; j < (1 << BYTEWIDTH); j++)
if (SYNTAX (j) != Sword)
fastmap[j] = 1;
break;
#ifdef emacs
case syntaxspec:
k = *p++;
for (j = 0; j < (1 << BYTEWIDTH); j++)
if (SYNTAX (j) == (enum syntaxcode) k)
fastmap[j] = 1;
break;
case notsyntaxspec:
k = *p++;
for (j = 0; j < (1 << BYTEWIDTH); j++)
if (SYNTAX (j) != (enum syntaxcode) k)
fastmap[j] = 1;
break;
#endif /* emacs */
case charset:
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
{
if (translate)
fastmap[translate[j]] = 1;
else
fastmap[j] = 1;
}
break;
case charset_not:
/* Chars beyond end of map must be allowed */
for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
if (translate)
fastmap[translate[j]] = 1;
else
fastmap[j] = 1;
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
{
if (translate)
fastmap[translate[j]] = 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -