📄 pcre_exec.c.svn-base
字号:
following. Then obey similar code to character type repeats - written out
again for speed. */
case OP_NCLASS:
case OP_CLASS:
{
data = ecode + 1; /* Save for matching */
ecode += 33; /* Advance past the item */
switch (*ecode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *ecode++ - OP_CRSTAR;
minimize = (c & 1) != 0;
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
minimize = (*ecode == OP_CRMINRANGE);
min = GET2(ecode, 1);
max = GET2(ecode, 3);
if (max == 0) max = INT_MAX;
ecode += 5;
break;
default: /* No repeat follows */
min = max = 1;
break;
}
/* First, ensure the minimum number of matches are present. */
#ifdef SUPPORT_UTF8
/* UTF-8 mode */
if (utf8)
{
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
if (c > 255)
{
if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
{
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
}
else
#endif
/* Not UTF-8 mode */
{
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
c = *eptr++;
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
/* If max == min we can continue with the main loop without the
need to recurse. */
if (min == max) continue;
/* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */
if (minimize)
{
#ifdef SUPPORT_UTF8
/* UTF-8 mode */
if (utf8)
{
for (fi = min;; fi++)
{
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
if (c > 255)
{
if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
}
else
{
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
}
else
#endif
/* Not UTF-8 mode */
{
for (fi = min;; fi++)
{
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
c = *eptr++;
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
}
}
/* Control never gets here */
}
/* If maximizing, find the longest possible run, then work backwards. */
else
{
pp = eptr;
#ifdef SUPPORT_UTF8
/* UTF-8 mode */
if (utf8)
{
for (i = min; i < max; i++)
{
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (c > 255)
{
if (op == OP_CLASS) break;
}
else
{
if ((data[c/8] & (1 << (c&7))) == 0) break;
}
eptr += len;
}
for (;;)
{
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (eptr-- == pp) break; /* Stop if tried at original pos */
BACKCHAR(eptr);
}
}
else
#endif
/* Not UTF-8 mode */
{
for (i = min; i < max; i++)
{
if (eptr >= md->end_subject) break;
c = *eptr;
if ((data[c/8] & (1 << (c&7))) == 0) break;
eptr++;
}
while (eptr >= pp)
{
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
}
}
RRETURN(MATCH_NOMATCH);
}
}
/* Control never gets here */
/* Match an extended character class. This opcode is encountered only
in UTF-8 mode, because that's the only time it is compiled. */
#ifdef SUPPORT_UTF8
case OP_XCLASS:
{
data = ecode + 1 + LINK_SIZE; /* Save for matching */
ecode += GET(ecode, 1); /* Advance past the item */
switch (*ecode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
c = *ecode++ - OP_CRSTAR;
minimize = (c & 1) != 0;
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
minimize = (*ecode == OP_CRMINRANGE);
min = GET2(ecode, 1);
max = GET2(ecode, 3);
if (max == 0) max = INT_MAX;
ecode += 5;
break;
default: /* No repeat follows */
min = max = 1;
break;
}
/* First, ensure the minimum number of matches are present. */
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
}
/* If max == min we can continue with the main loop without the
need to recurse. */
if (min == max) continue;
/* If minimizing, keep testing the rest of the expression and advancing
the pointer while it matches the class. */
if (minimize)
{
for (fi = min;; fi++)
{
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
/* If maximizing, find the longest possible run, then work backwards. */
else
{
pp = eptr;
for (i = min; i < max; i++)
{
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
if (!_pcre_xclass(c, data)) break;
eptr += len;
}
for(;;)
{
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (eptr-- == pp) break; /* Stop if tried at original pos */
BACKCHAR(eptr)
}
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
}
#endif /* End of XCLASS */
/* Match a single character, casefully */
case OP_CHAR:
#ifdef SUPPORT_UTF8
if (utf8)
{
length = 1;
ecode++;
GETCHARLEN(fc, ecode, length);
if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
}
else
#endif
/* Non-UTF-8 mode */
{
if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
ecode += 2;
}
break;
/* Match a single character, caselessly */
case OP_CHARNC:
#ifdef SUPPORT_UTF8
if (utf8)
{
length = 1;
ecode++;
GETCHARLEN(fc, ecode, length);
if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
/* If the pattern character's value is < 128, we have only one byte, and
can use the fast lookup table. */
if (fc < 128)
{
if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
}
/* Otherwise we must pick up the subject character */
else
{
unsigned int dc;
GETCHARINC(dc, eptr);
ecode += length;
/* If we have Unicode property support, we can use it to test the other
case of the character, if there is one. */
if (fc != dc)
{
#ifdef SUPPORT_UCP
if (dc != _pcre_ucp_othercase(fc))
#endif
RRETURN(MATCH_NOMATCH);
}
}
}
else
#endif /* SUPPORT_UTF8 */
/* Non-UTF-8 mode */
{
if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
ecode += 2;
}
break;
/* Match a single character repeatedly. */
case OP_EXACT:
min = max = GET2(ecode, 1);
ecode += 3;
goto REPEATCHAR;
case OP_POSUPTO:
possessive = TRUE;
/* Fall through */
case OP_UPTO:
case OP_MINUPTO:
min = 0;
max = GET2(ecode, 1);
minimize = *ecode == OP_MINUPTO;
ecode += 3;
goto REPEATCHAR;
case OP_POSSTAR:
possessive = TRUE;
min = 0;
max = INT_MAX;
ecode++;
goto REPEATCHAR;
case OP_POSPLUS:
possessive = TRUE;
min = 1;
max = INT_MAX;
ecode++;
goto REPEATCHAR;
case OP_POSQUERY:
possessive = TRUE;
min = 0;
max = 1;
ecode++;
goto REPEATCHAR;
case OP_STAR:
case OP_MINSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_QUERY:
case OP_MINQUERY:
c = *ecode++ - OP_STAR;
minimize = (c & 1) != 0;
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
/* Common code for all repeated single-character matches. We can give
up quickly if there are fewer than the minimum number of characters left in
the subject. */
REPEATCHAR:
#ifdef SUPPORT_UTF8
if (utf8)
{
length = 1;
charptr = ecode;
GETCHARLEN(fc, ecode, length);
if (min * length > md->end_subject - eptr) RRETURN(MAT
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -