📄 regexpr.c
字号:
unsigned char *textstart;
unsigned char *textend;
int a;
int b;
int ch;
int reg;
int match_end;
unsigned char *regstart;
unsigned char *regend;
int regsize;
match_state state;
assert(pos >= 0 && size >= 0);
assert(pos <= size);
text = string + pos;
textstart = string;
textend = string + size;
code = bufp->buffer;
translate = bufp->translate;
NEW_STATE(state, bufp->num_registers);
continue_matching:
switch (*code++)
{
case Cend:
{
match_end = text - textstart;
if (old_regs)
{
old_regs->start[0] = pos;
old_regs->end[0] = match_end;
if (!bufp->uses_registers)
{
for (a = 1; a < RE_NREGS; a++)
{
old_regs->start[a] = -1;
old_regs->end[a] = -1;
}
}
else
{
for (a = 1; a < bufp->num_registers; a++)
{
if ((GET_REG_START(state, a) == NULL) ||
(GET_REG_END(state, a) == NULL))
{
old_regs->start[a] = -1;
old_regs->end[a] = -1;
continue;
}
old_regs->start[a] = GET_REG_START(state, a) - textstart;
old_regs->end[a] = GET_REG_END(state, a) - textstart;
}
for (; a < RE_NREGS; a++)
{
old_regs->start[a] = -1;
old_regs->end[a] = -1;
}
}
}
FREE_STATE(state);
return match_end - pos;
}
case Cbol:
{
if (text == textstart || text[-1] == '\n')
goto continue_matching;
goto fail;
}
case Ceol:
{
if (text == textend || *text == '\n')
goto continue_matching;
goto fail;
}
case Cset:
{
NEXTCHAR(ch);
if (code[ch/8] & (1<<(ch & 7)))
{
code += 256/8;
goto continue_matching;
}
goto fail;
}
case Cexact:
{
NEXTCHAR(ch);
if (ch != (unsigned char)*code++)
goto fail;
goto continue_matching;
}
case Canychar:
{
NEXTCHAR(ch);
if (ch == '\n')
goto fail;
goto continue_matching;
}
case Cstart_memory:
{
reg = *code++;
SET_REG_START(state, reg, text, goto error);
goto continue_matching;
}
case Cend_memory:
{
reg = *code++;
SET_REG_END(state, reg, text, goto error);
goto continue_matching;
}
case Cmatch_memory:
{
reg = *code++;
regstart = GET_REG_START(state, reg);
regend = GET_REG_END(state, reg);
if ((regstart == NULL) || (regend == NULL))
goto fail; /* or should we just match nothing? */
regsize = regend - regstart;
if (regsize > (textend - text))
goto fail;
if(translate)
{
for (; regstart < regend; regstart++, text++)
if (translate[*regstart] != translate[*text])
goto fail;
}
else
for (; regstart < regend; regstart++, text++)
if (*regstart != *text)
goto fail;
goto continue_matching;
}
case Cupdate_failure_jump:
{
UPDATE_FAILURE(state, text, goto error);
/* fall to next case */
}
/* treat Cstar_jump just like Cjump if it hasn't been optimized */
case Cstar_jump:
case Cjump:
{
a = (unsigned char)*code++;
a |= (unsigned char)*code++ << 8;
code += (int)SHORT(a);
if (code<bufp->buffer || bufp->buffer+bufp->used<code) {
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cjump)");
FREE_STATE(state);
return -2;
}
goto continue_matching;
}
case Cdummy_failure_jump:
{
unsigned char *failuredest;
a = (unsigned char)*code++;
a |= (unsigned char)*code++ << 8;
a = (int)SHORT(a);
assert(*code == Cfailure_jump);
b = (unsigned char)code[1];
b |= (unsigned char)code[2] << 8;
failuredest = code + (int)SHORT(b) + 3;
if (failuredest<bufp->buffer || bufp->buffer+bufp->used < failuredest) {
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump failuredest)");
FREE_STATE(state);
return -2;
}
PUSH_FAILURE(state, failuredest, NULL, goto error);
code += a;
if (code<bufp->buffer || bufp->buffer+bufp->used < code) {
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cdummy_failure_jump code)");
FREE_STATE(state);
return -2;
}
goto continue_matching;
}
case Cfailure_jump:
{
a = (unsigned char)*code++;
a |= (unsigned char)*code++ << 8;
a = (int)SHORT(a);
if (code+a<bufp->buffer || bufp->buffer+bufp->used < code+a) {
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Cfailure_jump)");
FREE_STATE(state);
return -2;
}
PUSH_FAILURE(state, code + a, text, goto error);
goto continue_matching;
}
case Crepeat1:
{
unsigned char *pinst;
a = (unsigned char)*code++;
a |= (unsigned char)*code++ << 8;
a = (int)SHORT(a);
pinst = code + a;
if (pinst<bufp->buffer || bufp->buffer+bufp->used<pinst) {
PyErr_SetString(PyExc_SystemError, "Regex VM jump out of bounds (Crepeat1)");
FREE_STATE(state);
return -2;
}
/* pinst is sole instruction in loop, and it matches a
* single character. Since Crepeat1 was originally a
* Cupdate_failure_jump, we also know that backtracking
* is useless: so long as the single-character
* expression matches, it must be used. Also, in the
* case of +, we've already matched one character, so +
* can't fail: nothing here can cause a failure. */
switch (*pinst++)
{
case Cset:
{
if (translate)
{
while (text < textend)
{
ch = translate[(unsigned char)*text];
if (pinst[ch/8] & (1<<(ch & 7)))
text++;
else
break;
}
}
else
{
while (text < textend)
{
ch = (unsigned char)*text;
if (pinst[ch/8] & (1<<(ch & 7)))
text++;
else
break;
}
}
break;
}
case Cexact:
{
ch = (unsigned char)*pinst;
if (translate)
{
while (text < textend &&
translate[(unsigned char)*text] == ch)
text++;
}
else
{
while (text < textend && (unsigned char)*text == ch)
text++;
}
break;
}
case Canychar:
{
while (text < textend && (unsigned char)*text != '\n')
text++;
break;
}
case Csyntaxspec:
{
a = (unsigned char)*pinst;
if (translate)
{
while (text < textend &&
(SYNTAX(translate[*text]) & a) )
text++;
}
else
{
while (text < textend && (SYNTAX(*text) & a) )
text++;
}
break;
}
case Cnotsyntaxspec:
{
a = (unsigned char)*pinst;
if (translate)
{
while (text < textend &&
!(SYNTAX(translate[*text]) & a) )
text++;
}
else
{
while (text < textend && !(SYNTAX(*text) & a) )
text++;
}
break;
}
default:
{
FREE_STATE(state);
PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
return -2;
/*NOTREACHED*/
}
}
/* due to the funky way + and * are compiled, the top
* failure- stack entry at this point is actually a
* success entry -- update it & pop it */
UPDATE_FAILURE(state, text, goto error);
goto fail; /* i.e., succeed <wink/sigh> */
}
case Cbegbuf:
{
if (text == textstart)
goto continue_matching;
goto fail;
}
case Cendbuf:
{
if (text == textend)
goto continue_matching;
goto fail;
}
case Cwordbeg:
{
if (text == textend)
goto fail;
if (!(SYNTAX(*text) & Sword))
goto fail;
if (text == textstart)
goto continue_matching;
if (!(SYNTAX(text[-1]) & Sword))
goto continue_matching;
goto fail;
}
case Cwordend:
{
if (text == textstart)
goto fail;
if (!(SYNTAX(text[-1]) & Sword))
goto fail;
if (text == textend)
goto continue_matching;
if (!(SYNTAX(*text) & Sword))
goto continue_matching;
goto fail;
}
case Cwordbound:
{
/* Note: as in gnu regexp, this also matches at the
* beginning and end of buffer. */
if (text == textstart || text == textend)
goto continue_matching;
if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
goto continue_matching;
goto fail;
}
case Cnotwordbound:
{
/* Note: as in gnu regexp, this never matches at the
* beginning and end of buffer. */
if (text == textstart || text == textend)
goto fail;
if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
goto continue_matching;
goto fail;
}
case Csyntaxspec:
{
NEXTCHAR(ch);
if (!(SYNTAX(ch) & (unsigned char)*code++))
goto fail;
goto continue_matching;
}
case Cnotsyntaxspec:
{
NEXTCHAR(ch);
if (SYNTAX(ch) & (unsigned char)*code++)
goto fail;
goto continue_matching;
}
default:
{
FREE_STATE(state);
PyErr_SetString(PyExc_SystemError, "Unknown regex opcode: memory corrupted?");
return -2;
/*NOTREACHED*/
}
}
#if 0 /* This line is never reached --Guido */
abort();
#endif
/*
*NOTREACHED
*/
/* Using "break;" in the above switch statement is equivalent to "goto fail;" */
fail:
POP_FAILURE(state, code, text, goto done_matching, goto error);
goto continue_matching;
done_matching:
/* if(translated != NULL) */
/* free(translated); */
FREE_STATE(state);
return -1;
error:
/* if (translated != NULL) */
/* free(translated); */
FREE_STATE(state);
return -2;
}
#undef PREFETCH
#undef NEXTCHAR
int re_search(regexp_t bufp, unsigned char *string, int size, int pos,
int range, regexp_registers_t regs)
{
unsigned char *fastmap;
unsigned char *translate;
unsigned char *text;
unsigned char *partstart;
unsigned char *partend;
int dir;
int ret;
unsigned char anchor;
assert(size >= 0 && pos >= 0);
assert(pos + range >= 0 && pos + range <= size); /* Bugfix by ylo */
fastmap = bufp->fastmap;
translate = bufp->translate;
if (fastmap && !bufp->fastmap_accurate) {
re_compile_fastmap(bufp);
if (PyErr_Occurred()) return -2;
}
anchor = bufp->anchor;
if (bufp->can_be_null == 1) /* can_be_null == 2: can match null at eob */
fastmap = NULL;
if (range < 0)
{
dir = -1;
range = -range;
}
else
dir = 1;
if (anchor == 2) {
if (pos != 0)
return -1;
else
range = 0;
}
for (; range >= 0; range--, pos += dir)
{
if (fastmap)
{
if (dir == 1)
{ /* searching forwards */
text = string + pos;
partend = string + size;
partstart = text;
if (translate)
while (text != partend &&
!fastmap[(unsigned char) translate[(unsigned char)*text]])
text++;
else
while (text != partend && !fastmap[(unsigned char)*text])
text++;
pos += text - partstart;
range -= text - partstart;
if (pos == size && bufp->can_be_null == 0)
return -1;
}
else
{ /* searching backwards */
text = string + pos;
partstart = string + pos - range;
partend = text;
if (translate)
while (text != partstart &&
!fastmap[(unsigned char)
translate[(unsigned char)*text]])
text--;
else
while (text != partstart &&
!fastmap[(unsigned char)*text])
text--;
pos -= partend - text;
range -= partend - text;
}
}
if (anchor == 1)
{ /* anchored to begline */
if (pos > 0 && (string[pos - 1] != '\n'))
continue;
}
assert(pos >= 0 && pos <= size);
ret = re_match(bufp, string, size, pos, regs);
if (ret >= 0)
return pos;
if (ret == -2)
return -2;
}
return -1;
}
/*
** Local Variables:
** mode: c
** c-file-style: "python"
** End:
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -