📄 pcre_exec.c
字号:
/* Not UTF-8 mode */ { for (fi = min;; fi++) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); c = *eptr++; if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ } /* If maximizing, find the longest possible run, then work backwards. */ else { pp = eptr;#ifdef SUPPORT_UTF8 /* UTF-8 mode */ if (utf8) { for (i = min; i < max; i++) { int len = 1; if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c > 255) { if (op == OP_CLASS) break; } else { if ((data[c/8] & (1 << (c&7))) == 0) break; } eptr += len; } for (;;) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr-- == pp) break; /* Stop if tried at original pos */ BACKCHAR(eptr); } } else#endif /* Not UTF-8 mode */ { for (i = min; i < max; i++) { if (eptr >= md->end_subject) break; c = *eptr; if ((data[c/8] & (1 << (c&7))) == 0) break; eptr++; } while (eptr >= pp) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; } } RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ /* Match an extended character class. This opcode is encountered only in UTF-8 mode, because that's the only time it is compiled. */#ifdef SUPPORT_UTF8 case OP_XCLASS: { data = ecode + 1 + LINK_SIZE; /* Save for matching */ ecode += GET(ecode, 1); /* Advance past the item */ switch (*ecode) { case OP_CRSTAR: case OP_CRMINSTAR: case OP_CRPLUS: case OP_CRMINPLUS: case OP_CRQUERY: case OP_CRMINQUERY: c = *ecode++ - OP_CRSTAR; minimize = (c & 1) != 0; min = rep_min[c]; /* Pick up values from tables; */ max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; break; case OP_CRRANGE: case OP_CRMINRANGE: minimize = (*ecode == OP_CRMINRANGE); min = GET2(ecode, 1); max = GET2(ecode, 3); if (max == 0) max = INT_MAX; ecode += 5; break; default: /* No repeat follows */ min = max = 1; break; } /* First, ensure the minimum number of matches are present. */ for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); } /* If max == min we can continue with the main loop without the need to recurse. */ if (min == max) continue; /* If minimizing, keep testing the rest of the expression and advancing the pointer while it matches the class. */ if (minimize) { for (fi = min;; fi++) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } /* If maximizing, find the longest possible run, then work backwards. */ else { pp = eptr; for (i = min; i < max; i++) { int len = 1; if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (!_pcre_xclass(c, data)) break; eptr += len; } for(;;) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr-- == pp) break; /* Stop if tried at original pos */ BACKCHAR(eptr) } RRETURN(MATCH_NOMATCH); } /* Control never gets here */ }#endif /* End of XCLASS */ /* Match a single character, casefully */ case OP_CHAR:#ifdef SUPPORT_UTF8 if (utf8) { length = 1; ecode++; GETCHARLEN(fc, ecode, length); if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); } else#endif /* Non-UTF-8 mode */ { if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); ecode += 2; } break; /* Match a single character, caselessly */ case OP_CHARNC:#ifdef SUPPORT_UTF8 if (utf8) { length = 1; ecode++; GETCHARLEN(fc, ecode, length); if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); /* If the pattern character's value is < 128, we have only one byte, and can use the fast lookup table. */ if (fc < 128) { if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); } /* Otherwise we must pick up the subject character */ else { int dc; GETCHARINC(dc, eptr); ecode += length; /* If we have Unicode property support, we can use it to test the other case of the character, if there is one. */ if (fc != dc) {#ifdef SUPPORT_UCP if (dc != _pcre_ucp_othercase(fc))#endif RRETURN(MATCH_NOMATCH); } } } else#endif /* SUPPORT_UTF8 */ /* Non-UTF-8 mode */ { if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); ecode += 2; } break; /* Match a single character repeatedly; different opcodes share code. */ case OP_EXACT: min = max = GET2(ecode, 1); ecode += 3; goto REPEATCHAR; case OP_UPTO: case OP_MINUPTO: min = 0; max = GET2(ecode, 1); minimize = *ecode == OP_MINUPTO; ecode += 3; goto REPEATCHAR; case OP_STAR: case OP_MINSTAR: case OP_PLUS: case OP_MINPLUS: case OP_QUERY: case OP_MINQUERY: c = *ecode++ - OP_STAR; minimize = (c & 1) != 0; min = rep_min[c]; /* Pick up values from tables; */ max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; /* Common code for all repeated single-character matches. We can give up quickly if there are fewer than the minimum number of characters left in the subject. */ REPEATCHAR:#ifdef SUPPORT_UTF8 if (utf8) { length = 1; charptr = ecode; GETCHARLEN(fc, ecode, length); if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); ecode += length; /* Handle multibyte character matching specially here. There is support for caseless matching if UCP support is present. */ if (length > 1) { int oclength = 0; uschar occhars[8];#ifdef SUPPORT_UCP int othercase; if ((ims & PCRE_CASELESS) != 0 && (othercase = _pcre_ucp_othercase(fc)) >= 0 && othercase >= 0) oclength = _pcre_ord2utf8(othercase, occhars);#endif /* SUPPORT_UCP */ for (i = 1; i <= min; i++) { if (memcmp(eptr, charptr, length) == 0) eptr += length; /* Need braces because of following else */ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else { if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); eptr += oclength; } } if (min == max) continue; if (minimize) { for (fi = min;; fi++) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); if (memcmp(eptr, charptr, length) == 0) eptr += length; /* Need braces because of following else */ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else { if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); eptr += oclength; } } /* Control never gets here */ } else { pp = eptr; for (i = min; i < max; i++) { if (eptr > md->end_subject - length) break; if (memcmp(eptr, charptr, length) == 0) eptr += length; else if (oclength == 0) break; else { if (memcmp(eptr, occhars, oclength) != 0) break; eptr += oclength; } } while (eptr >= pp) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr -= length; } RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } /* If the length of a UTF-8 character is 1, we fall through here, and obey the code as for non-UTF-8 characters below, though in this case the value of fc will always be < 128. */ } else#endif /* SUPPORT_UTF8 */ /* When not in UTF-8 mode, load a single-byte character. */ { if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); fc = *ecode++; } /* The value of fc at this point is always less than 256, though we may or may not be in UTF-8 mode. The code is duplicated for the caseless and caseful cases, for speed, since matching characters is likely to be quite common. First, ensure the minimum number of matches are present. If min = max, continue at the same level without recursing. Otherwise, if minimizing, keep trying the rest of the expression and advancing one matching character if failing, up to the maximum. Alternatively, if maximizing, find the maximum number of characters and work backwards. */ DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, max, eptr)); if ((ims & PCRE_CASELESS) != 0) { fc = md->lcc[fc]; for (i = 1; i <= min; i++) if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); if (min == max) continue; if (minimize) { for (fi = min;; fi++) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject || fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } else { pp = eptr; for (i = min; i < max; i++) { if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break; eptr++; } while (eptr >= pp) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); eptr--; if (rrc != MATCH_NOMATCH) RRETURN(rrc); } RRETURN(MATCH_NOMATCH); } /* Control never gets here */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -