📄 pcre_compile.c
字号:
break; /* Handle single-char matchers */ case OP_PROP: case OP_NOTPROP: cc += 2; /* Fall through */ case OP_NOT_DIGIT: case OP_DIGIT: case OP_NOT_WHITESPACE: case OP_WHITESPACE: case OP_NOT_WORDCHAR: case OP_WORDCHAR: case OP_ANY: case OP_ALLANY: branchlength++; cc++; break; /* The single-byte matcher isn't allowed */ case OP_ANYBYTE: return -2; /* Check a class for variable quantification */#ifdef SUPPORT_UTF8 case OP_XCLASS: cc += GET(cc, 1) - 33; /* Fall through */#endif case OP_CLASS: case OP_NCLASS: cc += 33; switch (*cc) { case OP_CRSTAR: case OP_CRMINSTAR: case OP_CRQUERY: case OP_CRMINQUERY: return -1; case OP_CRRANGE: case OP_CRMINRANGE: if (GET2(cc,1) != GET2(cc,3)) return -1; branchlength += GET2(cc,1); cc += 5; break; default: branchlength++; } break; /* Anything else is variable length */ default: return -1; } }/* Control never gets here */}/************************************************** Scan compiled regex for numbered bracket **************************************************//* This little function scans through a compiled pattern until it finds acapturing bracket with the given number.Arguments: code points to start of expression utf8 TRUE in UTF-8 mode number the required bracket numberReturns: pointer to the opcode for the bracket, or NULL if not found*/static const uschar *find_bracket(const uschar *code, BOOL utf8, int number){for (;;) { register int c = *code; if (c == OP_END) return NULL; /* XCLASS is used for classes that cannot be represented just by a bit map. This includes negated single high-valued characters. The length in the table is zero; the actual length is stored in the compiled code. */ if (c == OP_XCLASS) code += GET(code, 1); /* Handle capturing bracket */ else if (c == OP_CBRA) { int n = GET2(code, 1+LINK_SIZE); if (n == number) return (uschar *)code; code += _pcre_OP_lengths[c]; } /* Otherwise, we can get the item's length from the table, except that for repeated character types, we have to test for \p and \P, which have an extra two bytes of parameters. */ else { switch(c) { case OP_TYPESTAR: case OP_TYPEMINSTAR: case OP_TYPEPLUS: case OP_TYPEMINPLUS: case OP_TYPEQUERY: case OP_TYPEMINQUERY: case OP_TYPEPOSSTAR: case OP_TYPEPOSPLUS: case OP_TYPEPOSQUERY: if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; break; case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEEXACT: case OP_TYPEPOSUPTO: if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; break; } /* Add in the fixed length from the table */ code += _pcre_OP_lengths[c]; /* In UTF-8 mode, opcodes that are followed by a character may be followed by a multi-byte character. The length in the table is a minimum, so we have to arrange to skip the extra bytes. */#ifdef SUPPORT_UTF8 if (utf8) switch(c) { case OP_CHAR: case OP_CHARNC: case OP_EXACT: case OP_UPTO: case OP_MINUPTO: case OP_POSUPTO: case OP_STAR: case OP_MINSTAR: case OP_POSSTAR: case OP_PLUS: case OP_MINPLUS: case OP_POSPLUS: case OP_QUERY: case OP_MINQUERY: case OP_POSQUERY: if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; break; }#endif } }}/************************************************** Scan compiled regex for recursion reference **************************************************//* This little function scans through a compiled pattern until it finds aninstance of OP_RECURSE.Arguments: code points to start of expression utf8 TRUE in UTF-8 modeReturns: pointer to the opcode for OP_RECURSE, or NULL if not found*/static const uschar *find_recurse(const uschar *code, BOOL utf8){for (;;) { register int c = *code; if (c == OP_END) return NULL; if (c == OP_RECURSE) return code; /* XCLASS is used for classes that cannot be represented just by a bit map. This includes negated single high-valued characters. The length in the table is zero; the actual length is stored in the compiled code. */ if (c == OP_XCLASS) code += GET(code, 1); /* Otherwise, we can get the item's length from the table, except that for repeated character types, we have to test for \p and \P, which have an extra two bytes of parameters. */ else { switch(c) { case OP_TYPESTAR: case OP_TYPEMINSTAR: case OP_TYPEPLUS: case OP_TYPEMINPLUS: case OP_TYPEQUERY: case OP_TYPEMINQUERY: case OP_TYPEPOSSTAR: case OP_TYPEPOSPLUS: case OP_TYPEPOSQUERY: if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; break; case OP_TYPEPOSUPTO: case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEEXACT: if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; break; } /* Add in the fixed length from the table */ code += _pcre_OP_lengths[c]; /* In UTF-8 mode, opcodes that are followed by a character may be followed by a multi-byte character. The length in the table is a minimum, so we have to arrange to skip the extra bytes. */#ifdef SUPPORT_UTF8 if (utf8) switch(c) { case OP_CHAR: case OP_CHARNC: case OP_EXACT: case OP_UPTO: case OP_MINUPTO: case OP_POSUPTO: case OP_STAR: case OP_MINSTAR: case OP_POSSTAR: case OP_PLUS: case OP_MINPLUS: case OP_POSPLUS: case OP_QUERY: case OP_MINQUERY: case OP_POSQUERY: if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; break; }#endif } }}/************************************************** Scan compiled branch for non-emptiness **************************************************//* This function scans through a branch of a compiled pattern to see whether itcan match the empty string or not. It is called from could_be_empty()below and from compile_branch() when checking for an unlimited repeat of agroup that can match nothing. Note that first_significant_code() skips overbackward and negative forward assertions when its final argument is TRUE. If wehit an unclosed bracket, we return "empty" - this means we've struck an innerbracket whose current branch will already have been scanned.Arguments: code points to start of search endcode points to where to stop utf8 TRUE if in UTF8 modeReturns: TRUE if what is matched could be empty*/static BOOLcould_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8){register int c;for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE); code < endcode; code = first_significant_code(code + _pcre_OP_lengths[c], NULL, 0, TRUE)) { const uschar *ccode; c = *code; /* Skip over forward assertions; the other assertions are skipped by first_significant_code() with a TRUE final argument. */ if (c == OP_ASSERT) { do code += GET(code, 1); while (*code == OP_ALT); c = *code; continue; } /* Groups with zero repeats can of course be empty; skip them. */ if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO) { code += _pcre_OP_lengths[c]; do code += GET(code, 1); while (*code == OP_ALT); c = *code; continue; } /* For other groups, scan the branches. */ if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND) { BOOL empty_branch; if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ /* Scan a closed bracket */ empty_branch = FALSE; do { if (!empty_branch && could_be_empty_branch(code, endcode, utf8)) empty_branch = TRUE; code += GET(code, 1); } while (*code == OP_ALT); if (!empty_branch) return FALSE; /* All branches are non-empty */ c = *code; continue; } /* Handle the other opcodes */ switch (c) { /* Check for quantifiers after a class. XCLASS is used for classes that cannot be represented just by a bit map. This includes negated single high-valued characters. The length in _pcre_OP_lengths[] is zero; the actual length is stored in the compiled code, so we must update "code" here. */#ifdef SUPPORT_UTF8 case OP_XCLASS: ccode = code += GET(code, 1); goto CHECK_CLASS_REPEAT;#endif case OP_CLASS: case OP_NCLASS: ccode = code + 33;#ifdef SUPPORT_UTF8 CHECK_CLASS_REPEAT:#endif switch (*ccode) { case OP_CRSTAR: /* These could be empty; continue */ case OP_CRMINSTAR: case OP_CRQUERY: case OP_CRMINQUERY: break; default: /* Non-repeat => class must match */ case OP_CRPLUS: /* These repeats aren't empty */ case OP_CRMINPLUS: return FALSE; case OP_CRRANGE: case OP_CRMINRANGE: if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */ break; } break; /* Opcodes that must match a character */ case OP_PROP: case OP_NOTPROP: case OP_EXTUNI: case OP_NOT_DIGIT: case OP_DIGIT: case OP_NOT_WHITESPACE: case OP_WHITESPACE: case OP_NOT_WORDCHAR: case OP_WORDCHAR: case OP_ANY: case OP_ALLANY: case OP_ANYBYTE: case OP_CHAR: case OP_CHARNC: case OP_NOT: case OP_PLUS: case OP_MINPLUS: case OP_POSPLUS: case OP_EXACT: case OP_NOTPLUS: case OP_NOTMINPLUS: case OP_NOTPOSPLUS: case OP_NOTEXACT: case OP_TYPEPLUS: case OP_TYPEMINPLUS: case OP_TYPEPOSPLUS: case OP_TYPEEXACT: return FALSE; /* These are going to continue, as they may be empty, but we have to fudge the length for the \p and \P cases. */ case OP_TYPESTAR: case OP_TYPEMINSTAR: case OP_TYPEPOSSTAR: case OP_TYPEQUERY: case OP_TYPEMINQUERY: case OP_TYPEPOSQUERY: if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; break; /* Same for these */ case OP_TYPEUPTO:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -