📄 pcre.c
字号:
} else { code[-1] = OP_CHARS; *code++ = 1; } *code++ = class_lastchar; } /* Otherwise, negate the 32-byte map if necessary, and copy it into the code vector. */ else { if (negate_class) for (c = 0; c < 32; c++) code[c] = ~class[c]; else memcpy(code, class, 32); code += 32; } break; /* Various kinds of repeat */ case '{': if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); if (*errorptr != NULL) goto FAILED; goto REPEAT; case '*': repeat_min = 0; repeat_max = -1; goto REPEAT; case '+': repeat_min = 1; repeat_max = -1; goto REPEAT; case '?': repeat_min = 0; repeat_max = 1; REPEAT: if (previous == NULL) { *errorptr = ERR9; goto FAILED; } /* If the next character is '?' this is a minimizing repeat, by default, but if PCRE_UNGREEDY is set, it works the other way round. Advance to the next character. */ if (ptr[1] == '?') { repeat_type = greedy_non_default; ptr++; } else repeat_type = greedy_default; /* If previous was a string of characters, chop off the last one and use it as the subject of the repeat. If there was only one character, we can abolish the previous item altogether. A repeat with a zero minimum wipes out any reqchar setting, backing up to the previous value. We must also adjust the countlits value. */ if (*previous == OP_CHARS) { int len = previous[1]; if (repeat_min == 0) *reqchar = prevreqchar; *countlits += repeat_min - 1; if (len == 1) { c = previous[2]; code = previous; } else { c = previous[len+1]; previous[1]--; code--; } op_type = 0; /* Use single-char op codes */ goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ } /* If previous was a single negated character ([^a] or similar), we use one of the special opcodes, replacing it. The code is shared with single- character repeats by adding a suitable offset into repeat_type. */ else if ((int)*previous == OP_NOT) { op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */ c = previous[1]; code = previous; goto OUTPUT_SINGLE_REPEAT; } /* If previous was a character type match (\d or similar), abolish it and create a suitable repeat item. The code is shared with single-character repeats by adding a suitable offset into repeat_type. */ else if ((int)*previous < OP_EODN || *previous == OP_ANY) { op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ c = *previous; code = previous; OUTPUT_SINGLE_REPEAT: /* If the maximum is zero then the minimum must also be zero; Perl allows this case, so we do too - by simply omitting the item altogether. */ if (repeat_max == 0) goto END_REPEAT; /* Combine the op_type with the repeat_type */ repeat_type += op_type; /* A minimum of zero is handled either as the special case * or ?, or as an UPTO, with the maximum given. */ if (repeat_min == 0) { if (repeat_max == -1) *code++ = OP_STAR + repeat_type; else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; else { *code++ = OP_UPTO + repeat_type; *code++ = repeat_max >> 8; *code++ = (repeat_max & 255); } } /* The case {1,} is handled as the special case + */ else if (repeat_min == 1 && repeat_max == -1) *code++ = OP_PLUS + repeat_type; /* The case {n,n} is just an EXACT, while the general case {n,m} is handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */ else { if (repeat_min != 1) { *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ *code++ = repeat_min >> 8; *code++ = (repeat_min & 255); } /* If the mininum is 1 and the previous item was a character string, we either have to put back the item that got cancelled if the string length was 1, or add the character back onto the end of a longer string. For a character type nothing need be done; it will just get put back naturally. Note that the final character is always going to get added below. */ else if (*previous == OP_CHARS) { if (code == previous) code += 2; else previous[1]++; } /* For a single negated character we also have to put back the item that got cancelled. */ else if (*previous == OP_NOT) code++; /* If the maximum is unlimited, insert an OP_STAR. */ if (repeat_max < 0) { *code++ = c; *code++ = OP_STAR + repeat_type; } /* Else insert an UPTO if the max is greater than the min. */ else if (repeat_max != repeat_min) { *code++ = c; repeat_max -= repeat_min; *code++ = OP_UPTO + repeat_type; *code++ = repeat_max >> 8; *code++ = (repeat_max & 255); } } /* The character or character type itself comes last in all cases. */ *code++ = c; } /* If previous was a character class or a back reference, we put the repeat stuff after it, but just skip the item if the repeat was {0,0}. */ else if (*previous == OP_CLASS || *previous == OP_REF) { if (repeat_max == 0) { code = previous; goto END_REPEAT; } if (repeat_min == 0 && repeat_max == -1) *code++ = OP_CRSTAR + repeat_type; else if (repeat_min == 1 && repeat_max == -1) *code++ = OP_CRPLUS + repeat_type; else if (repeat_min == 0 && repeat_max == 1) *code++ = OP_CRQUERY + repeat_type; else { *code++ = OP_CRRANGE + repeat_type; *code++ = repeat_min >> 8; *code++ = repeat_min & 255; if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ *code++ = repeat_max >> 8; *code++ = repeat_max & 255; } } /* If previous was a bracket group, we may have to replicate it in certain cases. */ else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE || (int)*previous == OP_COND) { register int i; int ketoffset = 0; int len = code - previous; uschar *bralink = NULL; /* If the maximum repeat count is unlimited, find the end of the bracket by scanning through from the start, and compute the offset back to it from the current code pointer. There may be an OP_OPT setting following the final KET, so we can't find the end just by going back from the code pointer. */ if (repeat_max == -1) { register uschar *ket = previous; do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET); ketoffset = code - ket; } /* The case of a zero minimum is special because of the need to stick OP_BRAZERO in front of it, and because the group appears once in the data, whereas in other cases it appears the minimum number of times. For this reason, it is simplest to treat this case separately, as otherwise the code gets far too messy. There are several special subcases when the minimum is zero. */ if (repeat_min == 0) { /* If we set up a required char from the bracket, we must back off to the previous value and reset the countlits value too. */ if (subcountlits > 0) { *reqchar = prevreqchar; *countlits -= subcountlits; } /* If the maximum is also zero, we just omit the group from the output altogether. */ if (repeat_max == 0) { code = previous; goto END_REPEAT; } /* If the maximum is 1 or unlimited, we just have to stick in the BRAZERO and do no more at this point. */ if (repeat_max <= 1) { memmove(previous+1, previous, len); code++; *previous++ = OP_BRAZERO + repeat_type; } /* If the maximum is greater than 1 and limited, we have to replicate in a nested fashion, sticking OP_BRAZERO before each set of brackets. The first one has to be handled carefully because it's the original copy, which has to be moved up. The remainder can be handled by code that is common with the non-zero minimum case below. We just have to adjust the value or repeat_max, since one less copy is required. */ else { int offset; memmove(previous+4, previous, len); code += 4; *previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRA; /* We chain together the bracket offset fields that have to be filled in later when the ends of the brackets are reached. */ offset = (bralink == NULL)? 0 : previous - bralink; bralink = previous; *previous++ = offset >> 8; *previous++ = offset & 255; } repeat_max--; } /* If the minimum is greater than zero, replicate the group as many times as necessary, and adjust the maximum to the number of subsequent copies that we need. */ else { for (i = 1; i < repeat_min; i++) { memcpy(code, previous, len); code += len; } if (repeat_max > 0) repeat_max -= repeat_min; } /* This code is common to both the zero and non-zero minimum cases. If the maximum is limited, it replicates the group in a nested fashion, remembering the bracket starts on a stack. In the case of a zero minimum, the first one was set up above. In all cases the repeat_max now specifies the number of additional copies needed. */ if (repeat_max >= 0) { for (i = repeat_max - 1; i >= 0; i--) { *code++ = OP_BRAZERO + repeat_type; /* All but the final copy start a new nesting, maintaining the chain of brackets outstanding. */ if (i != 0) { int offset; *code++ = OP_BRA; offset = (bralink == NULL)? 0 : code - bralink; bralink = code; *code++ = offset >> 8; *code++ = offset & 255; } memcpy(code, previous, len); code += len; } /* Now chain through the pending brackets, and fill in their length fields (which are holding the chain links pro tem). */ while (bralink != NULL) { int oldlinkoffset; int offset = code - bralink + 1; uschar *bra = code - offset; oldlinkoffset = (bra[1] << 8) + bra[2]; bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; *code++ = OP_KET; *code++ = bra[1] = offset >> 8; *code++ = bra[2] = (offset & 255); } } /* If the maximum is unlimited, set a repeater in the final copy. We can't just offset backwards from the current code point, because we don't know if there's been an options resetting after the ket. The correct offset was computed above. */ else code[-ketoffset] = OP_KETRMAX + repeat_type; } /* Else there's some kind of shambles */ else { *errorptr = ERR11; goto FAILED; } /* In all case we no longer have a previous item. */ END_REPEAT: previous = NULL; break; /* Start of nested bracket sub-expression, or comment or lookahead or lookbehind or option setting or condition. First deal with special things that can come after a bracket; all are introduced by ?, and the appearance of any of them means that this is not a referencing group. They were checked for validity in the first pass over the string, so we don't have to check for syntax errors here. */ case '(': newoptions = options; skipbytes = 0; if (*(++ptr) == '?') { int set, unset; int *optset; switch (*(++ptr)) { case '#': /* Comment; skip to ket */ ptr++; while (*ptr != ')') ptr++; continue; case ':': /* Non-extracting bracket */ bravalue = OP_BRA; ptr++; break; case '(': bravalue = OP_COND; /* Conditional group */ if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) { int condref = *ptr - '0'; while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; if (condref == 0) { *errorptr = ERR35; goto FAILED; } ptr++; code[3] = OP_CREF; code[4] = condref >> 8; code[5] = condref & 255; skipbytes = 3; } else ptr--; break; case '=': /* Positive lookahead */ bravalue = OP_ASSERT; ptr++; break; case '!': /* Negative lookahead */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -