📄 pcre_exec.c
字号:
#endif /* Not UTF-8 mode */ { for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); c = *eptr++; if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } } /* If max == min we can continue with the main loop without the need to recurse. */ if (min == max) continue; /* If minimizing, keep testing the rest of the expression and advancing the pointer while it matches the class. */ if (minimize) {#ifdef SUPPORT_UTF8 /* UTF-8 mode */ if (utf8) { for (fi = min;; fi++) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (c > 255) { if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); } else { if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } } } else#endif /* Not UTF-8 mode */ { for (fi = min;; fi++) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); c = *eptr++; if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ } /* If maximizing, find the longest possible run, then work backwards. */ else { pp = eptr;#ifdef SUPPORT_UTF8 /* UTF-8 mode */ if (utf8) { for (i = min; i < max; i++) { int len = 1; if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (c > 255) { if (op == OP_CLASS) break; } else { if ((data[c/8] & (1 << (c&7))) == 0) break; } eptr += len; } for (;;) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr-- == pp) break; /* Stop if tried at original pos */ BACKCHAR(eptr); } } else#endif /* Not UTF-8 mode */ { for (i = min; i < max; i++) { if (eptr >= md->end_subject) break; c = *eptr; if ((data[c/8] & (1 << (c&7))) == 0) break; eptr++; } while (eptr >= pp) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; } } RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ /* Match an extended character class. This opcode is encountered only in UTF-8 mode, because that's the only time it is compiled. */#ifdef SUPPORT_UTF8 case OP_XCLASS: { data = ecode + 1 + LINK_SIZE; /* Save for matching */ ecode += GET(ecode, 1); /* Advance past the item */ switch (*ecode) { case OP_CRSTAR: case OP_CRMINSTAR: case OP_CRPLUS: case OP_CRMINPLUS: case OP_CRQUERY: case OP_CRMINQUERY: c = *ecode++ - OP_CRSTAR; minimize = (c & 1) != 0; min = rep_min[c]; /* Pick up values from tables; */ max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; break; case OP_CRRANGE: case OP_CRMINRANGE: minimize = (*ecode == OP_CRMINRANGE); min = GET2(ecode, 1); max = GET2(ecode, 3); if (max == 0) max = INT_MAX; ecode += 5; break; default: /* No repeat follows */ min = max = 1; break; } /* First, ensure the minimum number of matches are present. */ for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); } /* If max == min we can continue with the main loop without the need to recurse. */ if (min == max) continue; /* If minimizing, keep testing the rest of the expression and advancing the pointer while it matches the class. */ if (minimize) { for (fi = min;; fi++) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); GETCHARINC(c, eptr); if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } /* If maximizing, find the longest possible run, then work backwards. */ else { pp = eptr; for (i = min; i < max; i++) { int len = 1; if (eptr >= md->end_subject) break; GETCHARLEN(c, eptr, len); if (!_pcre_xclass(c, data)) break; eptr += len; } for(;;) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr-- == pp) break; /* Stop if tried at original pos */ BACKCHAR(eptr) } RRETURN(MATCH_NOMATCH); } /* Control never gets here */ }#endif /* End of XCLASS */ /* Match a single character, casefully */ case OP_CHAR:#ifdef SUPPORT_UTF8 if (utf8) { length = 1; ecode++; GETCHARLEN(fc, ecode, length); if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); } else#endif /* Non-UTF-8 mode */ { if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); ecode += 2; } break; /* Match a single character, caselessly */ case OP_CHARNC:#ifdef SUPPORT_UTF8 if (utf8) { length = 1; ecode++; GETCHARLEN(fc, ecode, length); if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); /* If the pattern character's value is < 128, we have only one byte, and can use the fast lookup table. */ if (fc < 128) { if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); } /* Otherwise we must pick up the subject character */ else { unsigned int dc; GETCHARINC(dc, eptr); ecode += length; /* If we have Unicode property support, we can use it to test the other case of the character, if there is one. */ if (fc != dc) {#ifdef SUPPORT_UCP if (dc != _pcre_ucp_othercase(fc))#endif RRETURN(MATCH_NOMATCH); } } } else#endif /* SUPPORT_UTF8 */ /* Non-UTF-8 mode */ { if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); ecode += 2; } break; /* Match a single character repeatedly. */ case OP_EXACT: min = max = GET2(ecode, 1); ecode += 3; goto REPEATCHAR; case OP_POSUPTO: possessive = TRUE; /* Fall through */ case OP_UPTO: case OP_MINUPTO: min = 0; max = GET2(ecode, 1); minimize = *ecode == OP_MINUPTO; ecode += 3; goto REPEATCHAR; case OP_POSSTAR: possessive = TRUE; min = 0; max = INT_MAX; ecode++; goto REPEATCHAR; case OP_POSPLUS: possessive = TRUE; min = 1; max = INT_MAX; ecode++; goto REPEATCHAR; case OP_POSQUERY: possessive = TRUE; min = 0; max = 1; ecode++; goto REPEATCHAR; case OP_STAR: case OP_MINSTAR: case OP_PLUS: case OP_MINPLUS: case OP_QUERY: case OP_MINQUERY: c = *ecode++ - OP_STAR; minimize = (c & 1) != 0; min = rep_min[c]; /* Pick up values from tables; */ max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; /* Common code for all repeated single-character matches. We can give up quickly if there are fewer than the minimum number of characters left in the subject. */ REPEATCHAR:#ifdef SUPPORT_UTF8 if (utf8) { length = 1; charptr = ecode; GETCHARLEN(fc, ecode, length); if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); ecode += length; /* Handle multibyte character matching specially here. There is support for caseless matching if UCP support is present. */ if (length > 1) { int oclength = 0; uschar occhars[8];#ifdef SUPPORT_UCP unsigned int othercase; if ((ims & PCRE_CASELESS) != 0 && (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR) oclength = _pcre_ord2utf8(othercase, occhars);#endif /* SUPPORT_UCP */ for (i = 1; i <= min; i++) { if (memcmp(eptr, charptr, length) == 0) eptr += length; /* Need braces because of following else */ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else { if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); eptr += oclength; } } if (min == max) continue; if (minimize) { for (fi = min;; fi++) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); if (memcmp(eptr, charptr, length) == 0) eptr += length; /* Need braces because of following else */ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else { if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); eptr += oclength; } } /* Control never gets here */ } else /* Maximize */ { pp = eptr; for (i = min; i < max; i++) { if (eptr > md->end_subject - length) break; if (memcmp(eptr, charptr, length) == 0) eptr += length; else if (oclength == 0) break; else { if (memcmp(eptr, occhars, oclength) != 0) break; eptr += oclength; } } if (possessive) continue; while (eptr >= pp) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr -= length;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -