📄 _sre.c
字号:
SRE_IS_WORD((int) ptr[0]) : 0;
return this == that;
case SRE_AT_LOC_BOUNDARY:
if (state->beginning == state->end)
return 0;
that = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
this = ((void*) ptr < state->end) ?
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
return this != that;
case SRE_AT_LOC_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
that = ((void*) ptr > state->beginning) ?
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
this = ((void*) ptr < state->end) ?
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
return this == that;
#if defined(HAVE_UNICODE)
case SRE_AT_UNI_BOUNDARY:
if (state->beginning == state->end)
return 0;
that = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
this = ((void*) ptr < state->end) ?
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
return this != that;
case SRE_AT_UNI_NON_BOUNDARY:
if (state->beginning == state->end)
return 0;
that = ((void*) ptr > state->beginning) ?
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
this = ((void*) ptr < state->end) ?
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
return this == that;
#endif
}
return 0;
}
LOCAL(int)
SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
{
/* check if character is a member of the given set */
int ok = 1;
for (;;) {
switch (*set++) {
case SRE_OP_LITERAL:
/* <LITERAL> <code> */
if (ch == set[0])
return ok;
set++;
break;
case SRE_OP_RANGE:
/* <RANGE> <lower> <upper> */
if (set[0] <= ch && ch <= set[1])
return ok;
set += 2;
break;
case SRE_OP_CHARSET:
/* <CHARSET> <bitmap> (16 bits per code word) */
if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
return ok;
set += 16;
break;
case SRE_OP_BIGCHARSET:
/* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
{
int count, block;
count = *(set++);
block = ((unsigned char*)set)[ch >> 8];
set += 128;
if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
return ok;
set += count*16;
break;
}
case SRE_OP_CATEGORY:
/* <CATEGORY> <code> */
if (sre_category(set[0], (int) ch))
return ok;
set += 1;
break;
case SRE_OP_NEGATE:
ok = !ok;
break;
case SRE_OP_FAILURE:
return !ok;
default:
/* internal error -- there's not much we can do about it
here, so let's just pretend it didn't match... */
return 0;
}
}
}
LOCAL(int) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level);
LOCAL(int)
SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, int maxcount, int level)
{
SRE_CODE chr;
SRE_CHAR* ptr = state->ptr;
SRE_CHAR* end = state->end;
int i;
/* adjust end */
if (maxcount < end - ptr && maxcount != 65535)
end = ptr + maxcount;
switch (pattern[0]) {
case SRE_OP_ANY:
/* repeated dot wildcard. */
TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
ptr++;
break;
case SRE_OP_ANY_ALL:
/* repeated dot wildcare. skip to the end of the target
string, and backtrack from there */
TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
ptr = end;
break;
case SRE_OP_LITERAL:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) *ptr == chr)
ptr++;
break;
case SRE_OP_LITERAL_IGNORE:
/* repeated literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
ptr++;
break;
case SRE_OP_NOT_LITERAL:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) *ptr != chr)
ptr++;
break;
case SRE_OP_NOT_LITERAL_IGNORE:
/* repeated non-literal */
chr = pattern[1];
TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
ptr++;
break;
case SRE_OP_IN:
/* repeated set */
TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
ptr++;
break;
default:
/* repeated single character pattern */
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
while ((SRE_CHAR*) state->ptr < end) {
i = SRE_MATCH(state, pattern, level);
if (i < 0)
return i;
if (!i)
break;
}
TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
(SRE_CHAR*) state->ptr - ptr));
return (SRE_CHAR*) state->ptr - ptr;
}
TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
return ptr - (SRE_CHAR*) state->ptr;
}
#if 0 /* not used in this release */
LOCAL(int)
SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
{
/* check if an SRE_OP_INFO block matches at the current position.
returns the number of SRE_CODE objects to skip if successful, 0
if no match */
SRE_CHAR* end = state->end;
SRE_CHAR* ptr = state->ptr;
int i;
/* check minimal length */
if (pattern[3] && (end - ptr) < pattern[3])
return 0;
/* check known prefix */
if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
/* <length> <skip> <prefix data> <overlap data> */
for (i = 0; i < pattern[5]; i++)
if ((SRE_CODE) ptr[i] != pattern[7 + i])
return 0;
return pattern[0] + 2 * pattern[6];
}
return pattern[0];
}
#endif
LOCAL(int)
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
{
/* check if string matches the given pattern. returns <0 for
error, 0 for failure, and 1 for success */
SRE_CHAR* end = state->end;
SRE_CHAR* ptr = state->ptr;
int i, count;
SRE_REPEAT* rp;
int lastmark;
SRE_CODE chr;
SRE_REPEAT rep; /* FIXME: <fl> allocate in STATE instead */
TRACE(("|%p|%p|ENTER %d\n", pattern, ptr, level));
#if defined(USE_STACKCHECK)
if (level % 10 == 0 && PyOS_CheckStack())
return SRE_ERROR_RECURSION_LIMIT;
#endif
#if defined(USE_RECURSION_LIMIT)
if (level > USE_RECURSION_LIMIT)
return SRE_ERROR_RECURSION_LIMIT;
#endif
if (pattern[0] == SRE_OP_INFO) {
/* optimization info block */
/* <INFO> <1=skip> <2=flags> <3=min> ... */
if (pattern[3] && (end - ptr) < pattern[3]) {
TRACE(("reject (got %d chars, need %d)\n",
(end - ptr), pattern[3]));
return 0;
}
pattern += pattern[1] + 1;
}
for (;;) {
switch (*pattern++) {
case SRE_OP_FAILURE:
/* immediate failure */
TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
return 0;
case SRE_OP_SUCCESS:
/* end of pattern */
TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
state->ptr = ptr;
return 1;
case SRE_OP_AT:
/* match at given position */
/* <AT> <code> */
TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
if (!SRE_AT(state, ptr, *pattern))
return 0;
pattern++;
break;
case SRE_OP_CATEGORY:
/* match at given category */
/* <CATEGORY> <code> */
TRACE(("|%p|%p|CATEGORY %d\n", pattern, ptr, *pattern));
if (ptr >= end || !sre_category(pattern[0], ptr[0]))
return 0;
pattern++;
ptr++;
break;
case SRE_OP_LITERAL:
/* match literal string */
/* <LITERAL> <code> */
TRACE(("|%p|%p|LITERAL %d\n", pattern, ptr, *pattern));
if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
return 0;
pattern++;
ptr++;
break;
case SRE_OP_NOT_LITERAL:
/* match anything that is not literal character */
/* <NOT_LITERAL> <code> */
TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, ptr, *pattern));
if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
return 0;
pattern++;
ptr++;
break;
case SRE_OP_ANY:
/* match anything (except a newline) */
/* <ANY> */
TRACE(("|%p|%p|ANY\n", pattern, ptr));
if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
return 0;
ptr++;
break;
case SRE_OP_ANY_ALL:
/* match anything */
/* <ANY_ALL> */
TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
if (ptr >= end)
return 0;
ptr++;
break;
case SRE_OP_IN:
/* match set member (or non_member) */
/* <IN> <skip> <set> */
TRACE(("|%p|%p|IN\n", pattern, ptr));
if (ptr >= end || !SRE_CHARSET(pattern + 1, *ptr))
return 0;
pattern += pattern[0];
ptr++;
break;
case SRE_OP_GROUPREF:
/* match backreference */
TRACE(("|%p|%p|GROUPREF %d\n", pattern, ptr, pattern[0]));
i = pattern[0];
{
SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
if (!p || !e || e < p)
return 0;
while (p < e) {
if (ptr >= end || *ptr != *p)
return 0;
p++; ptr++;
}
}
pattern++;
break;
case SRE_OP_GROUPREF_IGNORE:
/* match backreference */
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, ptr, pattern[0]));
i = pattern[0];
{
SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i];
SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1];
if (!p || !e || e < p)
return 0;
while (p < e) {
if (ptr >= end ||
state->lower(*ptr) != state->lower(*p))
return 0;
p++; ptr++;
}
}
pattern++;
break;
case SRE_OP_LITERAL_IGNORE:
TRACE(("|%p|%p|LITERAL_IGNORE %d\n", pattern, ptr, pattern[0]));
if (ptr >= end ||
state->lower(*ptr) != state->lower(*pattern))
return 0;
pattern++;
ptr++;
break;
case SRE_OP_NOT_LITERAL_IGNORE:
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", pattern, ptr, *pattern));
if (ptr >= end ||
state->lower(*ptr) == state->lower(*pattern))
return 0;
pattern++;
ptr++;
break;
case SRE_OP_IN_IGNORE:
TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
if (ptr >= end
|| !SRE_CHARSET(pattern + 1, (SRE_CODE) state->lower(*ptr)))
return 0;
pattern += pattern[0];
ptr++;
break;
case SRE_OP_MARK:
/* set mark */
/* <MARK> <gid> */
TRACE(("|%p|%p|MARK %d\n", pattern, ptr, pattern[0]));
i = pattern[0];
if (i & 1)
state->lastindex = i/2 + 1;
if (i > state->lastmark)
state->lastmark = i;
state->mark[i] = ptr;
pattern++;
break;
case SRE_OP_JUMP:
case SRE_OP_INFO:
/* jump forward */
/* <JUMP> <offset> */
TRACE(("|%p|%p|JUMP %d\n", pattern, ptr, pattern[0]));
pattern += pattern[0];
break;
case SRE_OP_ASSERT:
/* assert subpattern */
/* <ASSERT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT %d\n", pattern, ptr, pattern[1]));
state->ptr = ptr - pattern[1];
if (state->ptr < state->beginning)
return 0;
i = SRE_MATCH(state, pattern + 2, level + 1);
if (i <= 0)
return i;
pattern += pattern[0];
break;
case SRE_OP_ASSERT_NOT:
/* assert not subpattern */
/* <ASSERT_NOT> <skip> <back> <pattern> */
TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, ptr, pattern[1]));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -