📄 mbregex.c
字号:
case '+': case '?': case '*': /* If there is no previous pattern, char not special. */ if (!laststart) { snprintf(error_msg, ERROR_MSG_MAX_SIZE, "invalid regular expression; there's no previous pattern, to which '%c' would define cardinality at %d", c, p-pattern); if (bufp->buffer) { xfree(bufp->buffer); } FREE_AND_RETURN(stackb, error_msg); } /* If there is a sequence of repetition chars, collapse it down to just one. */ zero_times_ok = c != '+'; many_times_ok = c != '?'; greedy = 1; if (p != pend) { PATFETCH(c); switch (c) { case '?': greedy = 0; break; case '*': case '+': goto nested_meta; default: PATUNFETCH; break; } } repeat: /* Star, etc. applied to an empty pattern is equivalent to an empty pattern. */ if (!laststart) break; if (greedy && many_times_ok && *laststart == anychar && b - laststart <= 2) { if (b[-1] == stop_paren) b--; if (zero_times_ok) *laststart = anychar_repeat; else { BUFPUSH(anychar_repeat); } break; } /* Now we know whether or not zero matches is allowed and also whether or not two or more matches is allowed. */ if (many_times_ok) { /* If more than one repetition is allowed, put in at the end a backward relative jump from b to before the next jump we're going to put in below (which jumps from laststart to after this jump). */ GET_BUFFER_SPACE(3); store_jump(b,greedy?maybe_finalize_jump:finalize_push,laststart-3); b += 3; /* Because store_jump put stuff here. */ } /* On failure, jump from laststart to next pattern, which will be the end of the buffer after this jump is inserted. */ GET_BUFFER_SPACE(3); insert_jump(on_failure_jump, laststart, b + 3, b); b += 3; if (zero_times_ok) { if (greedy == 0) { GET_BUFFER_SPACE(3); insert_jump(try_next, laststart, b + 3, b); b += 3; } } else { /* At least one repetition is required, so insert a `dummy_failure_jump' before the initial `on_failure_jump' instruction of the loop. This effects a skip over that instruction the first time we hit that loop. */ GET_BUFFER_SPACE(3); insert_jump(dummy_failure_jump, laststart, laststart + 6, b); b += 3; } break; case '.': laststart = b; BUFPUSH(anychar); break; case '[': if (p == pend) FREE_AND_RETURN(stackb, "invalid regular expression; '[' can't be the last character ie. can't start range at the end of pattern"); while ((b - bufp->buffer + 9 + (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH) > bufp->allocated) EXTEND_BUFFER; laststart = b; if (*p == '^') { BUFPUSH(charset_not); p++; } else BUFPUSH(charset); p0 = p; BUFPUSH((1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH); /* Clear the whole map */ memset(b, 0, (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH + 2); had_mbchar = 0; had_num_literal = 0; had_char_class = 0; /* Read in characters and ranges, setting map bits. */ for (;;) { int size; unsigned last = (unsigned)-1; if ((size = EXTRACT_UNSIGNED(&b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH])) || current_mbctype) { /* Ensure the space is enough to hold another interval of multi-byte chars in charset(_not)?. */ size = (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH + 2 + size*8 + 8; while (b + size + 1 > bufp->buffer + bufp->allocated) EXTEND_BUFFER; } range_retry: if (range && had_char_class) { FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as an end value of range"); } PATFETCH(c); if (c == ']') { if (p == p0 + 1) { if (p == pend) FREE_AND_RETURN(stackb, "invalid regular expression; empty character class"); } else /* Stop if this isn't merely a ] inside a bracket expression, but rather the end of a bracket expression. */ break; } /* Look ahead to see if it's a range when the last thing was a character class. */ if (had_char_class && c == '-' && *p != ']') FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as a start value of range"); if (ismbchar(c)) { PATFETCH_MBC(c); had_mbchar++; } had_char_class = 0; /* \ escapes characters when inside [...]. */ if (c == '\\') { PATFETCH_RAW(c); switch (c) { case 'w': for (c = 0; c < (1 << MBRE_BYTEWIDTH); c++) { if (SYNTAX(c) == Sword || (!current_mbctype && SYNTAX(c) == Sword2)) SET_LIST_BIT(c); } if (current_mbctype) { set_list_bits(0x80, 0xffffffff, b); } had_char_class = 1; last = -1; continue; case 'W': for (c = 0; c < (1 << MBRE_BYTEWIDTH); c++) { if (SYNTAX(c) != Sword && ((current_mbctype && !re_mbctab[c]) || (!current_mbctype && SYNTAX(c) != Sword2))) SET_LIST_BIT(c); } had_char_class = 1; last = -1; continue; case 's': for (c = 0; c < 256; c++) if (ISSPACE(c)) SET_LIST_BIT(c); had_char_class = 1; last = -1; continue; case 'S': for (c = 0; c < 256; c++) if (!ISSPACE(c)) SET_LIST_BIT(c); if (current_mbctype) set_list_bits(0x80, 0xffffffff, b); had_char_class = 1; last = -1; continue; case 'd': for (c = '0'; c <= '9'; c++) SET_LIST_BIT(c); had_char_class = 1; last = -1; continue; case 'D': for (c = 0; c < 256; c++) if (!ISDIGIT(c)) SET_LIST_BIT(c); if (current_mbctype) set_list_bits(0x80, 0xffffffff, b); had_char_class = 1; last = -1; continue; case 'x': c = scan_hex(p, 2, &numlen); p += numlen; had_num_literal = 1; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': PATUNFETCH; c = scan_oct(p, 3, &numlen); p += numlen; had_num_literal = 1; break; case 'M': case 'C': case 'c': p0 = --p; c = read_special(p, pend, &p0); if (c > 255) goto invalid_escape; p = p0; had_num_literal = 1; break; default: c = read_backslash(c); if (ismbchar(c)) { PATFETCH_MBC(c); had_mbchar++; } break; } } /* Get a range. */ if (range) { if (last > c) goto invalid_pattern; range = 0; if (had_mbchar == 0) { for (;last<=c;last++) SET_LIST_BIT(last); } else if (had_mbchar == 2) { set_list_bits(last, c, b); } else { /* restriction: range between sbc and mbc */ goto invalid_pattern; } } else if (p[0] == '-' && p[1] != ']') { last = c; PATFETCH(c1); range = 1; goto range_retry; } else if (c == '[' && *p == ':') { /* Leave room for the null. */ char str[CHAR_CLASS_MAX_LENGTH + 1]; PATFETCH_RAW(c); c1 = 0; /* If pattern is `[[:'. */ if (p == pend) FREE_AND_RETURN(stackb, "invalid regular expression; re can't end '[[:'"); for (;;) { PATFETCH (c); if (c == ':' || c == ']' || p == pend || c1 == CHAR_CLASS_MAX_LENGTH) break; str[c1++] = c; } str[c1] = '\0'; /* If isn't a word bracketed by `[:' and:`]': undo the ending character, the letters, and leave the leading `:' and `[' (but set bits for them). */ if (c == ':' && *p == ']') { int ch; char is_alnum = STREQ(str, "alnum"); char is_alpha = STREQ(str, "alpha"); char is_blank = STREQ(str, "blank"); char is_cntrl = STREQ(str, "cntrl"); char is_digit = STREQ(str, "digit"); char is_graph = STREQ(str, "graph"); char is_lower = STREQ(str, "lower"); char is_print = STREQ(str, "print"); char is_punct = STREQ(str, "punct"); char is_space = STREQ(str, "space"); char is_upper = STREQ(str, "upper"); char is_xdigit = STREQ(str, "xdigit"); if (!IS_CHAR_CLASS(str)){ snprintf(error_msg, ERROR_MSG_MAX_SIZE, "invalid regular expression; [:%s:] is not a character class", str); FREE_AND_RETURN(stackb, error_msg); } /* Throw away the ] at the end of the character class. */ PATFETCH(c); if (p == pend) FREE_AND_RETURN(stackb, "invalid regular expression; range doesn't have ending ']' after a character class"); for (ch = 0; ch < 1 << MBRE_BYTEWIDTH; ch++) { if ( (is_alnum && ISALNUM(ch)) || (is_alpha && ISALPHA(ch)) || (is_blank && ISBLANK(ch)) || (is_cntrl && ISCNTRL(ch)) || (is_digit && ISDIGIT(ch)) || (is_graph && ISGRAPH(ch)) || (is_lower && ISLOWER(ch)) || (is_print && ISPRINT(ch)) || (is_punct && ISPUNCT(ch)) || (is_space && ISSPACE(ch)) || (is_upper && ISUPPER(ch)) || (is_xdigit && ISXDIGIT(ch))) SET_LIST_BIT(ch); } had_char_class = 1; } else { c1++; while (c1--) PATUNFETCH; SET_LIST_BIT(TRANSLATE_P()?translate['[']:'['); SET_LIST_BIT(TRANSLATE_P()?translate[':']:':'); had_char_class = 0; last = ':'; } } else if (had_mbchar == 0 && (!current_mbctype || !had_num_literal)) { SET_LIST_BIT(c); had_num_literal = 0; } else set_list_bits(c, c, b); had_mbchar = 0; } /* Discard any character set/class bitmap bytes that are all 0 at the end of the map. Decrement the map-length byte too. */ while ((int)b[-1] > 0 && b[(int)b[-1] - 1] == 0) b[-1]--; if (b[-1] != (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH) memmove(&b[(int)b[-1]], &b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH], 2 + EXTRACT_UNSIGNED(&b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH])*8); b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(int)b[-1]])*8; break; case '(': { int old_options = options; int push_option = 0; int casefold = 0; PATFETCH(c); if (c == '?') { int negative = 0; PATFETCH_RAW(c); switch (c) { case 'x': case 'p': case 'm': case 'i': case '-': for (;;) { switch (c) { case '-': negative = 1; break; case ':': case ')': break; case 'x': if (negative) options &= ~MBRE_OPTION_EXTENDED; else options |= MBRE_OPTION_EXTENDED; break; case 'p': if (negative) { if ((options&MBRE_OPTION_POSIXLINE) == MBRE_OPTION_POSIXLINE) { options &= ~MBRE_OPTION_POSIXLINE; } } else if ((options&MBRE_OPTION_POSIXLINE) != MBRE_OPTION_POSIXLINE) { options |= MBRE_OPTION_POSIXLINE; } push_option = 1; break; case 'm': if (negative) { if (options&MBRE_OPTION_MULTILINE) { options &= ~MBRE_OPTION_MULTILINE; } } else if (!(options&MBRE_OPTION_MULTILINE)) { options |= MBRE_OPTION_MULTILINE; } push_option = 1; break; case 'i': if (negative) { if (options&MBRE_OPTION_IGNORECASE) { options &= ~MBRE_OPTION_IGNORECASE; } } else if (!(options&MBRE_OPTION_IGNORECASE)) { options |= MBRE_OPTION_IGNORECASE; } casefold = 1; break; default: FREE_AND_RETURN(stackb, "undefined (?...) inline option"); } if (c == ')') { c = '#'; /* read whole in-line options */ break; } if (c == ':') break; PATFETCH_RAW(c); } break; case '#': for (;;) { PATFETCH(c); if (c == ')') break; } c = '#'; break; case ':': case '=': case '!': case '>': break; default: FREE_AND_RETURN(stackb, "undefined (?...) sequence"); } } else { PATUNFETCH; c = '('; } if (c == '#') { if (push_option) { BUFPUSH(option_set); BUFPUSH(options); } if (casefold) { if (options & MBRE_OPTION_IGNORECASE) BUFPUSH(casefold_on);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -