📄 mbregex.c
字号:
else BUFPUSH(casefold_off); } break; } if (stackp+8 >= stacke) { DOUBLE_STACK(int); } /* Laststart should point to the start_memory that we are about to push (unless the pattern has MBRE_NREGS or more ('s). */ /* obsolete: now MBRE_NREGS is just a default register size. */ *stackp++ = b - bufp->buffer; *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; *stackp++ = begalt - bufp->buffer; switch (c) { case '(': BUFPUSH(start_memory); BUFPUSH(regnum); *stackp++ = regnum++; *stackp++ = b - bufp->buffer; BUFPUSH(0); /* too many ()'s to fit in a byte. (max 254) */ if (regnum >= MBRE_REG_MAX) goto too_big; break; case '=': case '!': case '>': BUFPUSH(start_nowidth); *stackp++ = b - bufp->buffer; BUFPUSH(0); /* temporary value */ BUFPUSH(0); if (c != '!') break; BUFPUSH(on_failure_jump); *stackp++ = b - bufp->buffer; BUFPUSH(0); /* temporary value */ BUFPUSH(0); break; case ':': BUFPUSH(start_paren); pending_exact = 0; default: break; } if (push_option) { BUFPUSH(option_set); BUFPUSH(options); } if (casefold) { if (options & MBRE_OPTION_IGNORECASE) BUFPUSH(casefold_on); else BUFPUSH(casefold_off); } *stackp++ = c; *stackp++ = old_options; fixup_alt_jump = 0; laststart = 0; begalt = b; } break; case ')': if (stackp == stackb) FREE_AND_RETURN(stackb, "unmatched )"); pending_exact = 0; if (fixup_alt_jump) { /* Push a dummy failure point at the end of the alternative for a possible future `finalize_jump' to pop. See comments at `push_dummy_failure' in `re_match'. */ BUFPUSH(push_dummy_failure); /* We allocated space for this jump when we assigned to `fixup_alt_jump', in the `handle_alt' case below. */ store_jump(fixup_alt_jump, jump, b); } if (options != stackp[-1]) { if ((options ^ stackp[-1]) & MBRE_OPTION_IGNORECASE) { BUFPUSH((options&MBRE_OPTION_IGNORECASE)?casefold_off:casefold_on); } if ((options ^ stackp[-1]) != MBRE_OPTION_IGNORECASE) { BUFPUSH(option_set); BUFPUSH(stackp[-1]); } } p0 = b; options = *--stackp; switch (c = *--stackp) { case '(': { char *loc = bufp->buffer + *--stackp; *loc = regnum - stackp[-1]; BUFPUSH(stop_memory); BUFPUSH(stackp[-1]); BUFPUSH(regnum - stackp[-1]); stackp--; } break; case '!': BUFPUSH(pop_and_fail); /* back patch */ STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2); stackp--; /* fall through */ case '=': BUFPUSH(stop_nowidth); /* tell stack-pos place to start_nowidth */ STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2); BUFPUSH(0); /* space to hold stack pos */ BUFPUSH(0); stackp--; break; case '>': BUFPUSH(stop_backtrack); /* tell stack-pos place to start_nowidth */ STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2); BUFPUSH(0); /* space to hold stack pos */ BUFPUSH(0); stackp--; break; case ':': BUFPUSH(stop_paren); break; default: break; } begalt = *--stackp + bufp->buffer; stackp--; fixup_alt_jump = *stackp ? *stackp + bufp->buffer - 1 : 0; laststart = *--stackp + bufp->buffer; if (c == '!' || c == '=') laststart = b; break; case '|': /* Insert before the previous alternative a jump which jumps to this alternative if the former fails. */ GET_BUFFER_SPACE(3); insert_jump(on_failure_jump, begalt, b + 6, b); pending_exact = 0; b += 3; /* The alternative before this one has a jump after it which gets executed if it gets matched. Adjust that jump so it will jump to this alternative's analogous jump (put in below, which in turn will jump to the next (if any) alternative's such jump, etc.). The last such jump jumps to the correct final destination. A picture: _____ _____ | | | | | v | v a | b | c If we are at `b', then fixup_alt_jump right now points to a three-byte space after `a'. We'll put in the jump, set fixup_alt_jump to right after `b', and leave behind three bytes which we'll fill in when we get to after `c'. */ if (fixup_alt_jump) store_jump(fixup_alt_jump, jump_past_alt, b); /* Mark and leave space for a jump after this alternative, to be filled in later either by next alternative or when know we're at the end of a series of alternatives. */ fixup_alt_jump = b; GET_BUFFER_SPACE(3); b += 3; laststart = 0; begalt = b; break; case '{': /* If there is no previous pattern, this is an invalid pattern. */ if (!laststart) { snprintf(error_msg, ERROR_MSG_MAX_SIZE, "invalid regular expression; there's no previous pattern, to which '{' would define cardinality at %d", p-pattern); FREE_AND_RETURN(stackb, error_msg); } if( p == pend) FREE_AND_RETURN(stackb, "invalid regular expression; '{' can't be last character" ); beg_interval = p - 1; lower_bound = -1; /* So can see if are set. */ upper_bound = -1; GET_UNSIGNED_NUMBER(lower_bound); if (c == ',') { GET_UNSIGNED_NUMBER(upper_bound); } else /* Interval such as `{1}' => match exactly once. */ upper_bound = lower_bound; if (lower_bound < 0 || c != '}') goto unfetch_interval; if (lower_bound >= MBRE_DUP_MAX || upper_bound >= MBRE_DUP_MAX) FREE_AND_RETURN(stackb, "too big quantifier in {,}"); if (upper_bound < 0) upper_bound = MBRE_DUP_MAX; if (lower_bound > upper_bound) FREE_AND_RETURN(stackb, "can't do {n,m} with n > m"); beg_interval = 0; pending_exact = 0; greedy = 1; if (p != pend) { PATFETCH(c); if (c == '?') greedy = 0; else PATUNFETCH; } if (lower_bound == 0) { zero_times_ok = 1; if (upper_bound == MBRE_DUP_MAX) { many_times_ok = 1; goto repeat; } if (upper_bound == 1) { many_times_ok = 0; goto repeat; } } if (lower_bound == 1) { if (upper_bound == 1) { /* No need to repeat */ break; } if (upper_bound == MBRE_DUP_MAX) { many_times_ok = 1; zero_times_ok = 0; goto repeat; } } /* If upper_bound is zero, don't want to succeed at all; jump from laststart to b + 3, which will be the end of the buffer after this jump is inserted. */ if (upper_bound == 0) { GET_BUFFER_SPACE(3); insert_jump(jump, laststart, b + 3, b); b += 3; break; } /* If lower_bound == upper_bound, repeat count can be removed */ if (lower_bound == upper_bound) { int mcnt; int skip_stop_paren = 0; if (b[-1] == stop_paren) { skip_stop_paren = 1; b--; } if (*laststart == exactn && laststart[1]+2 == b - laststart && laststart[1]*lower_bound < 256) { mcnt = laststart[1]; GET_BUFFER_SPACE((lower_bound-1)*mcnt); laststart[1] = lower_bound*mcnt; while (--lower_bound) { memcpy(b, laststart+2, mcnt); b += mcnt; } if (skip_stop_paren) BUFPUSH(stop_paren); break; } if (lower_bound < 5 && b - laststart < 10) { /* 5 and 10 are the magic numbers */ mcnt = b - laststart; GET_BUFFER_SPACE((lower_bound-1)*mcnt); while (--lower_bound) { memcpy(b, laststart, mcnt); b += mcnt; } if (skip_stop_paren) BUFPUSH(stop_paren); break; } if (skip_stop_paren) b++; /* push back stop_paren */ } /* Otherwise, we have a nontrivial interval. When we're all done, the pattern will look like: set_number_at <jump count> <upper bound> set_number_at <succeed_n count> <lower bound> succeed_n <after jump addr> <succed_n count> <body of loop> jump_n <succeed_n addr> <jump count> (The upper bound and `jump_n' are omitted if `upper_bound' is 1, though.) */ { /* If the upper bound is > 1, we need to insert more at the end of the loop. */ unsigned int nbytes = (unsigned int)upper_bound == 1 ? 10 : 20; GET_BUFFER_SPACE(nbytes); /* Initialize lower bound of the `succeed_n', even though it will be set during matching by its attendant `set_number_at' (inserted next), because `re_compile_fastmap' needs to know. Jump to the `jump_n' we might insert below. */ insert_jump_n(succeed_n, laststart, b + (nbytes/2), b, lower_bound); b += 5; /* Just increment for the succeed_n here. */ /* Code to initialize the lower bound. Insert before the `succeed_n'. The `5' is the last two bytes of this `set_number_at', plus 3 bytes of the following `succeed_n'. */ insert_op_2(set_number_at, laststart, b, 5, lower_bound); b += 5; if (upper_bound > 1) { /* More than one repetition is allowed, so append a backward jump to the `succeed_n' that starts this interval. When we've reached this during matching, we'll have matched the interval once, so jump back only `upper_bound - 1' times. */ GET_BUFFER_SPACE(5); store_jump_n(b, greedy?jump_n:finalize_push_n, laststart + 5, upper_bound - 1); b += 5; /* The location we want to set is the second parameter of the `jump_n'; that is `b-2' as an absolute address. `laststart' will be the `set_number_at' we're about to insert; `laststart+3' the number to set, the source for the relative address. But we are inserting into the middle of the pattern -- so everything is getting moved up by 5. Conclusion: (b - 2) - (laststart + 3) + 5, i.e., b - laststart. We insert this at the beginning of the loop so that if we fail during matching, we'll reinitialize the bounds. */ insert_op_2(set_number_at, laststart, b, b - laststart, upper_bound - 1); b += 5; } } break; unfetch_interval: /* If an invalid interval, match the characters as literals. */ p = beg_interval; beg_interval = 0; /* normal_char and normal_backslash need `c'. */ PATFETCH(c); goto normal_char; case '\\': if (p == pend) FREE_AND_RETURN(stackb, "invalid regular expression; '\\' can't be last character"); /* Do not translate the character after the \, so that we can distinguish, e.g., \B from \b, even if we normally would translate, e.g., B to b. */ PATFETCH_RAW(c); switch (c) { case 's': case 'S': case 'd': case 'D': while (b - bufp->buffer + 9 + (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH > bufp->allocated) EXTEND_BUFFER; laststart = b; if (c == 's' || c == 'd') { BUFPUSH(charset); } else { BUFPUSH(charset_not); } BUFPUSH((1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH); memset(b, 0, (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH + 2); if (c == 's' || c == 'S') { SET_LIST_BIT(' '); SET_LIST_BIT('\t'); SET_LIST_BIT('\n'); SET_LIST_BIT('\r'); SET_LIST_BIT('\f'); } else { char cc; for (cc = '0'; cc <= '9'; cc++) { SET_LIST_BIT(cc); } } while ((int)b[-1] > 0 && b[b[-1] - 1] == 0) b[-1]--; if (b[-1] != (1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH) memmove(&b[(int)b[-1]], &b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH], 2 + EXTRACT_UNSIGNED(&b[(1 << MBRE_BYTEWIDTH) / MBRE_BYTEWIDTH])*8); b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(int)b[-1]])*8; break; case 'w': laststart = b; BUFPUSH(wordchar); break; case 'W': laststart = b; BUFPUSH(notwordchar); break;#ifndef RUBY case '<': BUFPUSH(wordbeg); break; case '>': BUFPUSH(wordend); break;#endif case 'b': BUFPUSH(wordbound); break; case 'B': BUFPUSH(notwordbound); break; case 'A': BUFPUSH(begbuf); break; case 'Z': if ((bufp->options & MBRE_OPTION_SINGLELINE) == 0) { BUFPUSH(endbuf2); break; } /* fall through */ case 'z': BUFPUSH(endbuf); break; case 'G': BUFPUSH(begpos); break; /* hex */ case 'x': had_mbchar = 0; c = scan_hex(p, 2, &numlen); p += numlen; had_num_literal = 1; goto numeric_char; /* octal */ case '0': had_mbchar = 0; c = scan_oct(p, 3, &numlen); p += numlen; had_num_literal = 1; goto numeric_char; /* back-ref or octal */ case '1': case '2': case '3': case '4': case '5': cas
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -