📄 pcre_compile.c
字号:
PUT(code, LINK_SIZE, 0); /* Default length */return code + 2*LINK_SIZE;}/************************************************** Complete a callout item **************************************************//* A callout item contains the length of the next item in the pattern, whichwe can't fill in till after we have reached the relevant point. This is usedfor both automatic and manual callouts.Arguments: previous_callout points to previous callout item ptr current pattern pointer cd pointers to tables etcReturns: nothing*/static voidcomplete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd){int length = ptr - cd->start_pattern - GET(previous_callout, 2);PUT(previous_callout, 2 + LINK_SIZE, length);}#ifdef SUPPORT_UCP/************************************************** Get othercase range **************************************************//* This function is passed the start and end of a class range, in UTF-8 modewith UCP support. It searches up the characters, looking for internal ranges ofcharacters in the "other" case. Each call returns the next one, updating thestart address.Arguments: cptr points to starting character value; updated d end value ocptr where to put start of othercase range odptr where to put end of othercase rangeYield: TRUE when range returned; FALSE when no more*/static BOOLget_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr, unsigned int *odptr){unsigned int c, othercase, next;for (c = *cptr; c <= d; c++) { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }if (c > d) return FALSE;*ocptr = othercase;next = othercase + 1;for (++c; c <= d; c++) { if (_pcre_ucp_othercase(c) != next) break; next++; }*odptr = next - 1;*cptr = c;return TRUE;}#endif /* SUPPORT_UCP *//************************************************** Check if auto-possessifying is possible **************************************************//* This function is called for unlimited repeats of certain items, to seewhether the next thing could possibly match the repeated item. If not, it makessense to automatically possessify the repeated item.Arguments: op_code the repeated op code this data for this item, depends on the opcode utf8 TRUE in UTF-8 mode utf8_char used for utf8 character bytes, NULL if not relevant ptr next character in pattern options options bits cd contains pointers to tables etc.Returns: TRUE if possessifying is wanted*/static BOOLcheck_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char, const uschar *ptr, int options, compile_data *cd){int next;/* Skip whitespace and comments in extended mode */if ((options & PCRE_EXTENDED) != 0) { for (;;) { while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; if (*ptr == '#') { while (*(++ptr) != 0) if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } } else break; } }/* If the next item is one that we can handle, get its value. A non-negativevalue is a character, a negative value is an escape value. */if (*ptr == '\\') { int temperrorcode = 0; next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE); if (temperrorcode != 0) return FALSE; ptr++; /* Point after the escape sequence */ }else if ((cd->ctypes[*ptr] & ctype_meta) == 0) {#ifdef SUPPORT_UTF8 if (utf8) { GETCHARINC(next, ptr); } else#endif next = *ptr++; }else return FALSE;/* Skip whitespace and comments in extended mode */if ((options & PCRE_EXTENDED) != 0) { for (;;) { while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; if (*ptr == '#') { while (*(++ptr) != 0) if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } } else break; } }/* If the next thing is itself optional, we have to give up. */if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0) return FALSE;/* Now compare the next item with the previous opcode. If the previous is apositive single character match, "item" either contains the character or, if"item" is greater than 127 in utf8 mode, the character's bytes are inutf8_char. *//* Handle cases when the next item is a character. */if (next >= 0) switch(op_code) { case OP_CHAR:#ifdef SUPPORT_UTF8 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif return item != next; /* For CHARNC (caseless character) we must check the other case. If we have Unicode property support, we can use it to test the other case of high-valued characters. */ case OP_CHARNC:#ifdef SUPPORT_UTF8 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif if (item == next) return FALSE;#ifdef SUPPORT_UTF8 if (utf8) { unsigned int othercase; if (next < 128) othercase = cd->fcc[next]; else#ifdef SUPPORT_UCP othercase = _pcre_ucp_othercase((unsigned int)next);#else othercase = NOTACHAR;#endif return (unsigned int)item != othercase; } else#endif /* SUPPORT_UTF8 */ return (item != cd->fcc[next]); /* Non-UTF-8 mode */ /* For OP_NOT, "item" must be a single-byte character. */ case OP_NOT: if (item == next) return TRUE; if ((options & PCRE_CASELESS) == 0) return FALSE;#ifdef SUPPORT_UTF8 if (utf8) { unsigned int othercase; if (next < 128) othercase = cd->fcc[next]; else#ifdef SUPPORT_UCP othercase = _pcre_ucp_othercase(next);#else othercase = NOTACHAR;#endif return (unsigned int)item == othercase; } else#endif /* SUPPORT_UTF8 */ return (item == cd->fcc[next]); /* Non-UTF-8 mode */ case OP_DIGIT: return next > 127 || (cd->ctypes[next] & ctype_digit) == 0; case OP_NOT_DIGIT: return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0; case OP_WHITESPACE: return next > 127 || (cd->ctypes[next] & ctype_space) == 0; case OP_NOT_WHITESPACE: return next <= 127 && (cd->ctypes[next] & ctype_space) != 0; case OP_WORDCHAR: return next > 127 || (cd->ctypes[next] & ctype_word) == 0; case OP_NOT_WORDCHAR: return next <= 127 && (cd->ctypes[next] & ctype_word) != 0; case OP_HSPACE: case OP_NOT_HSPACE: switch(next) { case 0x09: case 0x20: case 0xa0: case 0x1680: case 0x180e: case 0x2000: case 0x2001: case 0x2002: case 0x2003: case 0x2004: case 0x2005: case 0x2006: case 0x2007: case 0x2008: case 0x2009: case 0x200A: case 0x202f: case 0x205f: case 0x3000: return op_code != OP_HSPACE; default: return op_code == OP_HSPACE; } case OP_VSPACE: case OP_NOT_VSPACE: switch(next) { case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x85: case 0x2028: case 0x2029: return op_code != OP_VSPACE; default: return op_code == OP_VSPACE; } default: return FALSE; }/* Handle the case when the next item is \d, \s, etc. */switch(op_code) { case OP_CHAR: case OP_CHARNC:#ifdef SUPPORT_UTF8 if (utf8 && item > 127) { GETCHAR(item, utf8_char); }#endif switch(-next) { case ESC_d: return item > 127 || (cd->ctypes[item] & ctype_digit) == 0; case ESC_D: return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0; case ESC_s: return item > 127 || (cd->ctypes[item] & ctype_space) == 0; case ESC_S: return item <= 127 && (cd->ctypes[item] & ctype_space) != 0; case ESC_w: return item > 127 || (cd->ctypes[item] & ctype_word) == 0; case ESC_W: return item <= 127 && (cd->ctypes[item] & ctype_word) != 0; case ESC_h: case ESC_H: switch(item) { case 0x09: case 0x20: case 0xa0: case 0x1680: case 0x180e: case 0x2000: case 0x2001: case 0x2002: case 0x2003: case 0x2004: case 0x2005: case 0x2006: case 0x2007: case 0x2008: case 0x2009: case 0x200A: case 0x202f: case 0x205f: case 0x3000: return -next != ESC_h; default: return -next == ESC_h; } case ESC_v: case ESC_V: switch(item) { case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x85: case 0x2028: case 0x2029: return -next != ESC_v; default: return -next == ESC_v; } default: return FALSE; } case OP_DIGIT: return next == -ESC_D || next == -ESC_s || next == -ESC_W || next == -ESC_h || next == -ESC_v; case OP_NOT_DIGIT: return next == -ESC_d; case OP_WHITESPACE: return next == -ESC_S || next == -ESC_d || next == -ESC_w; case OP_NOT_WHITESPACE: return next == -ESC_s || next == -ESC_h || next == -ESC_v; case OP_HSPACE: return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w; case OP_NOT_HSPACE: return next == -ESC_h; /* Can't have \S in here because VT matches \S (Perl anomaly) */ case OP_VSPACE: return next == -ESC_V || next == -ESC_d || next == -ESC_w; case OP_NOT_VSPACE: return next == -ESC_v; case OP_WORDCHAR: return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v; case OP_NOT_WORDCHAR: return next == -ESC_w || next == -ESC_d; default: return FALSE; }/* Control does not reach here */}/************************************************** Compile one branch **************************************************//* Scan the pattern, compiling it into the a vector. If the options arechanged during the branch, the pointer is used to change the external optionsbits. This function is used during the pre-compile phase when we are tryingto find out the amount of memory needed, as well as during the real compilephase. The value of lengthptr distinguishes the two phases.Arguments: optionsptr pointer to the option bits codeptr points to the pointer to the current code point ptrptr points to the current pattern pointer errorcodeptr points to error code variable firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE) reqbyteptr set to the last literal character required, else < 0 bcptr points to current branch chain cd contains pointers to tables etc. lengthptr NULL during the real compile phase points to length accumulator during pre-compile phaseReturns: TRUE on success FALSE, with *errorcodeptr set non-zero on error*/static BOOLcompile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr, int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd, int *lengthptr){int repeat_type, op_type;int repeat_min = 0, repeat_max = 0; /* To please picky compilers */int bravalue = 0;int greedy_default, greedy_non_default;int firstbyte, reqbyte;int zeroreqbyte, zerofirstbyte;int req_caseopt, reqvary, tempreqvary;int options = *optionsptr;int after_manual_callout = 0;int length_prevgroup = 0;register int c;register uschar *code = *codeptr;uschar *last_code = code;uschar *orig_code = code;uschar *tempcode;BOOL inescq = FALSE;BOOL groupsetfirstbyte = FALSE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -