📄 pcre_dfa_exec.c
字号:
case 0x2002: /* EN SPACE */ case 0x2003: /* EM SPACE */ case 0x2004: /* THREE-PER-EM SPACE */ case 0x2005: /* FOUR-PER-EM SPACE */ case 0x2006: /* SIX-PER-EM SPACE */ case 0x2007: /* FIGURE SPACE */ case 0x2008: /* PUNCTUATION SPACE */ case 0x2009: /* THIN SPACE */ case 0x200A: /* HAIR SPACE */ case 0x202f: /* NARROW NO-BREAK SPACE */ case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ case 0x3000: /* IDEOGRAPHIC SPACE */ OK = TRUE; break; default: OK = FALSE; break; } if (OK == (d == OP_HSPACE)) { if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO) { active_count--; /* Remove non-match possibility */ next_active_state--; } if (++count >= GET2(code, 1)) { ADD_NEW_DATA(-(state_offset + 4), 0, 0); } else { ADD_NEW_DATA(-state_offset, count, 0); } } } break;/* ========================================================================== */ /* These opcodes are followed by a character that is usually compared to the current subject character; it is loaded into d. We still get here even if there is no subject character, because in some cases zero repetitions are permitted. */ /*-----------------------------------------------------------------*/ case OP_CHAR: if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); } break; /*-----------------------------------------------------------------*/ case OP_CHARNC: if (clen == 0) break;#ifdef SUPPORT_UTF8 if (utf8) { if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else { unsigned int othercase; if (c < 128) othercase = fcc[c]; else /* If we have Unicode property support, we can use it to test the other case of the character. */#ifdef SUPPORT_UCP othercase = _pcre_ucp_othercase(c);#else othercase = NOTACHAR;#endif if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } } } else#endif /* SUPPORT_UTF8 */ /* Non-UTF-8 mode */ { if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); } } break;#ifdef SUPPORT_UCP /*-----------------------------------------------------------------*/ /* This is a tricky one because it can match more than one character. Find out how many characters to skip, and then set up a negative state to wait for them to pass before continuing. */ case OP_EXTUNI: if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M) { const uschar *nptr = ptr + clen; int ncount = 0; while (nptr < end_subject) { int nclen = 1; GETCHARLEN(c, nptr, nclen); if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break; ncount++; nptr += nclen; } ADD_NEW_DATA(-(state_offset + 1), 0, ncount); } break;#endif /*-----------------------------------------------------------------*/ /* This is a tricky like EXTUNI because it too can match more than one character (when CR is followed by LF). In this case, set up a negative state to wait for one character to pass before continuing. */ case OP_ANYNL: if (clen > 0) switch(c) { case 0x000b: case 0x000c: case 0x0085: case 0x2028: case 0x2029: if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break; case 0x000a: ADD_NEW(state_offset + 1, 0); break; case 0x000d: if (ptr + 1 < end_subject && ptr[1] == 0x0a) { ADD_NEW_DATA(-(state_offset + 1), 0, 1); } else { ADD_NEW(state_offset + 1, 0); } break; } break; /*-----------------------------------------------------------------*/ case OP_NOT_VSPACE: if (clen > 0) switch(c) { case 0x000a: case 0x000b: case 0x000c: case 0x000d: case 0x0085: case 0x2028: case 0x2029: break; default: ADD_NEW(state_offset + 1, 0); break; } break; /*-----------------------------------------------------------------*/ case OP_VSPACE: if (clen > 0) switch(c) { case 0x000a: case 0x000b: case 0x000c: case 0x000d: case 0x0085: case 0x2028: case 0x2029: ADD_NEW(state_offset + 1, 0); break; default: break; } break; /*-----------------------------------------------------------------*/ case OP_NOT_HSPACE: if (clen > 0) switch(c) { case 0x09: /* HT */ case 0x20: /* SPACE */ case 0xa0: /* NBSP */ case 0x1680: /* OGHAM SPACE MARK */ case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ case 0x2000: /* EN QUAD */ case 0x2001: /* EM QUAD */ case 0x2002: /* EN SPACE */ case 0x2003: /* EM SPACE */ case 0x2004: /* THREE-PER-EM SPACE */ case 0x2005: /* FOUR-PER-EM SPACE */ case 0x2006: /* SIX-PER-EM SPACE */ case 0x2007: /* FIGURE SPACE */ case 0x2008: /* PUNCTUATION SPACE */ case 0x2009: /* THIN SPACE */ case 0x200A: /* HAIR SPACE */ case 0x202f: /* NARROW NO-BREAK SPACE */ case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ case 0x3000: /* IDEOGRAPHIC SPACE */ break; default: ADD_NEW(state_offset + 1, 0); break; } break; /*-----------------------------------------------------------------*/ case OP_HSPACE: if (clen > 0) switch(c) { case 0x09: /* HT */ case 0x20: /* SPACE */ case 0xa0: /* NBSP */ case 0x1680: /* OGHAM SPACE MARK */ case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ case 0x2000: /* EN QUAD */ case 0x2001: /* EM QUAD */ case 0x2002: /* EN SPACE */ case 0x2003: /* EM SPACE */ case 0x2004: /* THREE-PER-EM SPACE */ case 0x2005: /* FOUR-PER-EM SPACE */ case 0x2006: /* SIX-PER-EM SPACE */ case 0x2007: /* FIGURE SPACE */ case 0x2008: /* PUNCTUATION SPACE */ case 0x2009: /* THIN SPACE */ case 0x200A: /* HAIR SPACE */ case 0x202f: /* NARROW NO-BREAK SPACE */ case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ case 0x3000: /* IDEOGRAPHIC SPACE */ ADD_NEW(state_offset + 1, 0); break; } break; /*-----------------------------------------------------------------*/ /* Match a negated single character. This is only used for one-byte characters, that is, we know that d < 256. The character we are checking (c) can be multibyte. */ case OP_NOT: if (clen > 0) { unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d; if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); } } break; /*-----------------------------------------------------------------*/ case OP_PLUS: case OP_MINPLUS: case OP_POSPLUS: case OP_NOTPLUS: case OP_NOTMINPLUS: case OP_NOTPOSPLUS: count = current_state->count; /* Already matched */ if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); } if (clen > 0) { unsigned int otherd = NOTACHAR; if ((ims & PCRE_CASELESS) != 0) {#ifdef SUPPORT_UTF8 if (utf8 && d >= 128) {#ifdef SUPPORT_UCP otherd = _pcre_ucp_othercase(d);#endif /* SUPPORT_UCP */ } else#endif /* SUPPORT_UTF8 */ otherd = fcc[d]; } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { if (count > 0 && (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS)) { active_count--; /* Remove non-match possibility */ next_active_state--; } count++; ADD_NEW(state_offset, count); } } break; /*-----------------------------------------------------------------*/ case OP_QUERY: case OP_MINQUERY: case OP_POSQUERY: case OP_NOTQUERY: case OP_NOTMINQUERY: case OP_NOTPOSQUERY: ADD_ACTIVE(state_offset + dlen + 1, 0); if (clen > 0) { unsigned int otherd = NOTACHAR; if ((ims & PCRE_CASELESS) != 0) {#ifdef SUPPORT_UTF8 if (utf8 && d >= 128) {#ifdef SUPPORT_UCP otherd = _pcre_ucp_othercase(d);#endif /* SUPPORT_UCP */ } else#endif /* SUPPORT_UTF8 */ otherd = fcc[d]; } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY) { active_count--; /* Remove non-match possibility */ next_active_state--; } ADD_NEW(state_offset + dlen + 1, 0); } } break; /*-----------------------------------------------------------------*/ case OP_STAR: case OP_MINSTAR: case OP_POSSTAR: case OP_NOTSTAR: case OP_NOTMINSTAR: case OP_NOTPOSSTAR: ADD_ACTIVE(state_offset + dlen + 1, 0); if (clen > 0) { unsigned int otherd = NOTACHAR; if ((ims & PCRE_CASELESS) != 0) {#ifdef SUPPORT_UTF8 if (utf8 && d >= 128) {#ifdef SUPPORT_UCP otherd = _pcre_ucp_othercase(d);#endif /* SUPPORT_UCP */ } else#endif /* SUPPORT_UTF8 */ otherd = fcc[d]; } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR) { active_count--; /* Remove non-match possibility */ next_active_state--; } ADD_NEW(state_offset, 0); } } break; /*-----------------------------------------------------------------*/ case OP_EXACT: case OP_NOTEXACT: count = current_state->count; /* Number already matched */ if (clen > 0) { unsigned int otherd = NOTACHAR; if ((ims & PCRE_CASELESS) != 0) {#ifdef SUPPORT_UTF8 if (utf8 && d >= 128) {#ifdef SUPPORT_UCP otherd = _pcre_ucp_othercase(d);#endif /* SUPPORT_UCP */ } else#endif /* SUPPORT_UTF8 */ otherd = fcc[d]; } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { if (++count >= GET2(code, 1)) { ADD_NEW(state_offset + dlen + 3, 0); } else { ADD_NEW(state_offset, count); } } } break; /*-----------------------------------------------------------------*/ case OP_UPTO: case OP_MINUPTO: case OP_POSUPTO: case OP_NOTUPTO: case OP_NOTMINUPTO: case OP_NOTPOSUPTO: ADD_ACTIVE(state_offset + dlen + 3, 0); count = current_state->count; /* Number already matched */ if (clen > 0) { unsigned int otherd = NOTACHAR; if ((ims & PCRE_CASELESS) != 0) {#ifdef SUPPORT_UTF8 if (utf8 && d >= 128) {#ifdef SUPPORT_UCP otherd = _pcre_ucp_othercase(d);#endif /* SUPPORT_UCP */ } else#endif /* SUPPORT_UTF8 */ otherd = fcc[d];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -