📄 gnuregex.c
字号:
return ret; } else if (p[0] == '-' && p[1] != ']') { /* This handles ranges made up of characters only. */ reg_errcode_t ret; /* Move past the `-'. */ PATFETCH(c1); ret = compile_range(&p, pend, translate, syntax, b); if (ret != REG_NOERROR) return ret; } /* See if we're at the beginning of a possible character * class. */ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') { /* Leave room for the null. */ char str[CHAR_CLASS_MAX_LENGTH + 1]; PATFETCH(c); c1 = 0; /* If pattern is `[[:'. */ if (p == pend) return REG_EBRACK; for (;;) { PATFETCH(c); if (c == ':' || c == ']' || p == pend || c1 == CHAR_CLASS_MAX_LENGTH) break; str[c1++] = c; } str[c1] = '\0'; /* If isn't a word bracketed by `[:' and:`]': * undo the ending character, the letters, and leave * the leading `:' and `[' (but set bits for them). */ if (c == ':' && *p == ']') { int ch; boolean is_alnum = STREQ(str, "alnum"); boolean is_alpha = STREQ(str, "alpha"); boolean is_blank = STREQ(str, "blank"); boolean is_cntrl = STREQ(str, "cntrl"); boolean is_digit = STREQ(str, "digit"); boolean is_graph = STREQ(str, "graph"); boolean is_lower = STREQ(str, "lower"); boolean is_print = STREQ(str, "print"); boolean is_punct = STREQ(str, "punct"); boolean is_space = STREQ(str, "space"); boolean is_upper = STREQ(str, "upper"); boolean is_xdigit = STREQ(str, "xdigit"); if (!IS_CHAR_CLASS(str)) return REG_ECTYPE; /* Throw away the ] at the end of the character * class. */ PATFETCH(c); if (p == pend) return REG_EBRACK; for (ch = 0; ch < 1 << BYTEWIDTH; ch++) { if ((is_alnum && ISALNUM(ch)) || (is_alpha && ISALPHA(ch)) || (is_blank && ISBLANK(ch)) || (is_cntrl && ISCNTRL(ch)) || (is_digit && ISDIGIT(ch)) || (is_graph && ISGRAPH(ch)) || (is_lower && ISLOWER(ch)) || (is_print && ISPRINT(ch)) || (is_punct && ISPUNCT(ch)) || (is_space && ISSPACE(ch)) || (is_upper && ISUPPER(ch)) || (is_xdigit && ISXDIGIT(ch))) SET_LIST_BIT(ch); } had_char_class = true; } else { c1++; while (c1--) PATUNFETCH; SET_LIST_BIT('['); SET_LIST_BIT(':'); had_char_class = false; } } else { had_char_class = false; SET_LIST_BIT(c); } } /* Discard any (non)matching list bytes that are all 0 at the * end of the map. Decrease the map-length byte too. */ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) b[-1]--; b += b[-1]; } break; case '(': if (syntax & RE_NO_BK_PARENS) goto handle_open; else goto normal_char; case ')': if (syntax & RE_NO_BK_PARENS) goto handle_close; else goto normal_char; case '\n': if (syntax & RE_NEWLINE_ALT) goto handle_alt; else goto normal_char; case '|': if (syntax & RE_NO_BK_VBAR) goto handle_alt; else goto normal_char; case '{': if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) goto handle_interval; else goto normal_char; case '\\': if (p == pend) return REG_EESCAPE; /* Do not translate the character after the \, so that we can * distinguish, e.g., \B from \b, even if we normally would * translate, e.g., B to b. */ PATFETCH_RAW(c); switch (c) { case '(': if (syntax & RE_NO_BK_PARENS) goto normal_backslash; handle_open: bufp->re_nsub++; regnum++; if (COMPILE_STACK_FULL) { RETALLOC(compile_stack.stack, compile_stack.size << 1, compile_stack_elt_t); if (compile_stack.stack == NULL) return REG_ESPACE; compile_stack.size <<= 1; } /* These are the values to restore when we hit end of this * group. They are all relative offsets, so that if the * whole pattern moves because of realloc, they will still * be valid. */ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; COMPILE_STACK_TOP.fixup_alt_jump = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; COMPILE_STACK_TOP.regnum = regnum; /* We will eventually replace the 0 with the number of * groups inner to this one. But do not push a * start_memory for groups beyond the last one we can * represent in the compiled pattern. */ if (regnum <= MAX_REGNUM) { COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; BUF_PUSH_3(start_memory, regnum, 0); } compile_stack.avail++; fixup_alt_jump = 0; laststart = 0; begalt = b; /* If we've reached MAX_REGNUM groups, then this open * won't actually generate any code, so we'll have to * clear pending_exact explicitly. */ pending_exact = 0; break; case ')': if (syntax & RE_NO_BK_PARENS) goto normal_backslash; if (COMPILE_STACK_EMPTY) { if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) goto normal_backslash; else return REG_ERPAREN; } handle_close: if (fixup_alt_jump) { /* Push a dummy failure point at the end of the * alternative for a possible future * `pop_failure_jump' to pop. See comments at * `push_dummy_failure' in `re_match_2'. */ BUF_PUSH(push_dummy_failure); /* We allocated space for this jump when we assigned * to `fixup_alt_jump', in the `handle_alt' case below. */ STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1); } /* See similar code for backslashed left paren above. */ if (COMPILE_STACK_EMPTY) { if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) goto normal_char; else return REG_ERPAREN; } /* Since we just checked for an empty stack above, this * ``can't happen''. */ assert(compile_stack.avail != 0); { /* We don't just want to restore into `regnum', because * later groups should continue to be numbered higher, * as in `(ab)c(de)' -- the second group is #2. */ regnum_t this_group_regnum; compile_stack.avail--; begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; fixup_alt_jump = COMPILE_STACK_TOP.fixup_alt_jump ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 : 0; laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; this_group_regnum = COMPILE_STACK_TOP.regnum; /* If we've reached MAX_REGNUM groups, then this open * won't actually generate any code, so we'll have to * clear pending_exact explicitly. */ pending_exact = 0; /* We're at the end of the group, so now we know how many * groups were inside this one. */ if (this_group_regnum <= MAX_REGNUM) { unsigned char *inner_group_loc = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; *inner_group_loc = regnum - this_group_regnum; BUF_PUSH_3(stop_memory, this_group_regnum, regnum - this_group_regnum); } } break; case '|': /* `\|'. */ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) goto normal_backslash; handle_alt: if (syntax & RE_LIMITED_OPS) goto normal_char; /* Insert before the previous alternative a jump which * jumps to this alternative if the former fails. */ GET_BUFFER_SPACE(3); INSERT_JUMP(on_failure_jump, begalt, b + 6); pending_exact = 0; b += 3; /* The alternative before this one has a jump after it * which gets executed if it gets matched. Adjust that * jump so it will jump to this alternative's analogous * jump (put in below, which in turn will jump to the next * (if any) alternative's such jump, etc.). The last such * jump jumps to the correct final destination. A picture: * _____ _____ * | | | | * | v | v * a | b | c * * If we are at `b', then fixup_alt_jump right now points to a * three-byte space after `a'. We'll put in the jump, set * fixup_alt_jump to right after `b', and leave behind three * bytes which we'll fill in when we get to after `c'. */ if (fixup_alt_jump) STORE_JUMP(jump_past_alt, fixup_alt_jump, b); /* Mark and leave space for a jump after this alternative, * to be filled in later either by next alternative or * when know we're at the end of a series of alternatives. */ fixup_alt_jump = b; GET_BUFFER_SPACE(3); b += 3; laststart = 0; begalt = b; break; case '{': /* If \{ is a literal. */ if (!(syntax & RE_INTERVALS) /* If we're at `\{' and it's not the open-interval * operator. */ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern && p == pend)) goto normal_backslash; handle_interval: { /* If got here, then the syntax allows intervals. */ /* At least (most) this many matches must be made. */ int lower_bound = -1, upper_bound = -1; beg_interval = p - 1; if (p == pend) { if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; else return REG_EBRACE; } GET_UNSIGNED_NUMBER(lower_bound); if (c == ',') { GET_UNSIGNED_NUMBER(upper_bound); if (upper_bound < 0) upper_bound = RE_DUP_MAX; } else /* Interval such as `{1}' => match exactly once. */ upper_bound = lower_bound; if (lower_bound < 0 || upper_bound > RE_DUP_MAX || lower_bound > upper_bound) { if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; else return REG_BADBR; } if (!(syntax & RE_NO_BK_BRACES)) { if (c != '\\') return REG_EBRACE; PATFETCH(c); } if (c != '}') { if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; else return REG_BADBR; } /* We just parsed a valid interval. */ /* If it's invalid to have no preceding re. */ if (!laststart) { if (syntax & RE_CONTEXT_INVALID_OPS) return REG_BADRPT; else if (syntax & RE_CONTEXT_INDEP_OPS) laststart = b; else goto unfetch_interval; } /* If the upper bound is zero, don't want to succeed at * all; jump from `laststart' to `b + 3', which will be * the end of the buffer after we insert the jump. */ if (upper_bound == 0) { GET_BUFFER_SPACE(3); INSERT_JUMP(jump, laststart, b + 3); b += 3; } /* Otherwise, we have a nontrivial interval. When * we're all done, the pattern will look like: * set_number_at <jump count> <upper bound> * set_number_at <succeed_n count> <lower bound> * succeed_n <after jump addr> <succed_n count> * <body of loop> * jump_n <succeed_n addr> <jump count> * (The upper bound and `jump_n' are omitted if * `upper_bound' is 1, though.) */ else { /* If the upper bound is > 1, we need to insert * more at the end of the loop. */ unsigned nbytes = 10 + (upper_bound > 1) * 10; GET_BUFFER_SPACE(nbytes); /* Initialize lower bound of the `succeed_n', even * though it will be set during matching by its * attendant `set_number_at' (inserted next), * because `re_compile_fastmap' needs to know. * Jump to the `jump_n' we might insert below. */ INSERT_JUMP2(succeed_n, laststart, b + 5 + (upper_bound > 1) * 5, lower_bound); b += 5; /* Code to initialize the lower bound. Insert * before the `succeed_n'. The `5' is the last two * bytes of this `set_number_at', plus 3 bytes of * the following `succeed_n'. */ insert_op2(set_number_at, laststart, 5, lower_bound, b); b += 5; if (upper_bound > 1) { /* More than one repetition is allowed, so * append a backward jump to the `succeed_n' * that starts this interval. * * When we've reached this during matching, * we'll have matched the interval once, so * jump back only `upper_bound - 1' times. */ STORE_JUMP2(jump_n, b, laststart + 5, upper_bound - 1); b += 5; /* The location we want to set is the second * parameter of the `jump_n'; that is `b-2' as * an absolute address. `laststart' will be * the `set_number_at' we're about to insert; * `laststart+3' the number to set, the source * for the relative address. But we are * inserting into the middle of the pattern -- * so everything is getting moved up by 5. * Conclusion: (b - 2) - (laststart + 3) + 5, * i.e., b - laststart. * * We insert this at the beginning of the loop * so that if we fail during matching, we'll * reinitialize the bounds. */ insert_op2(set_number_at, laststart, b - laststart, upper_bound - 1, b); b += 5; } } pending_exact = 0; beg_interval = NULL; } break; unfetch_interval: /* If an invalid interval, match the characters as literals. */ assert(beg_interval); p = beg_interval; beg_interval = NULL; /* normal_char and normal_backslash need `c'. */ PATFETCH(c); if (!(syntax & RE_NO_BK_BRACES)) { if (p > pattern && p[-1] == '\\') goto normal_backslash; } goto normal_char;#ifdef emacs /* There is no way to specify the before_dot and after_dot * operators. rms says this is ok. --karl */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -