📄 regexec.c
字号:
if (BE (start < 0 || start > length, 0)) return -1; if (BE (length < last_start || (0 <= range && last_start < start), 0)) last_start = length; else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) last_start = 0; __libc_lock_lock (dfa->lock); eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; /* Compile fastmap if we haven't yet. */ if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate) re_compile_fastmap (bufp); if (BE (bufp->no_sub, 0)) regs = NULL; /* We need at least 1 register. */ if (regs == NULL) nregs = 1; else if (BE (bufp->regs_allocated == REGS_FIXED && regs->num_regs <= bufp->re_nsub, 0)) { nregs = regs->num_regs; if (BE (nregs < 1, 0)) { /* Nothing can be copied to regs. */ regs = NULL; nregs = 1; } } else nregs = bufp->re_nsub + 1; pmatch = re_malloc (regmatch_t, nregs); if (BE (pmatch == NULL, 0)) { rval = -2; goto out; } result = re_search_internal (bufp, string, length, start, last_start, stop, nregs, pmatch, eflags); rval = 0; /* I hope we needn't fill ther regs with -1's when no match was found. */ if (result != REG_NOERROR) rval = -1; else if (regs != NULL) { /* If caller wants register contents data back, copy them. */ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, bufp->regs_allocated); if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) rval = -2; } if (BE (rval == 0, 1)) { if (ret_len) { assert (pmatch[0].rm_so == start); rval = pmatch[0].rm_eo - start; } else rval = pmatch[0].rm_so; } re_free (pmatch); out: __libc_lock_unlock (dfa->lock); return rval;}static unsigned intinternal_functionre_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, int regs_allocated){ int rval = REGS_REALLOCATE; Idx i; Idx need_regs = nregs + 1; /* We need one extra element beyond `num_regs' for the `-1' marker GNU code uses. */ /* Have the register data arrays been allocated? */ if (regs_allocated == REGS_UNALLOCATED) { /* No. So allocate them with malloc. */ regs->start = re_malloc (regoff_t, need_regs); if (BE (regs->start == NULL, 0)) return REGS_UNALLOCATED; regs->end = re_malloc (regoff_t, need_regs); if (BE (regs->end == NULL, 0)) { re_free (regs->start); return REGS_UNALLOCATED; } regs->num_regs = need_regs; } else if (regs_allocated == REGS_REALLOCATE) { /* Yes. If we need more elements than were already allocated, reallocate them. If we need fewer, just leave it alone. */ if (BE (need_regs > regs->num_regs, 0)) { regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); regoff_t *new_end; if (BE (new_start == NULL, 0)) return REGS_UNALLOCATED; new_end = re_realloc (regs->end, regoff_t, need_regs); if (BE (new_end == NULL, 0)) { re_free (new_start); return REGS_UNALLOCATED; } regs->start = new_start; regs->end = new_end; regs->num_regs = need_regs; } } else { assert (regs_allocated == REGS_FIXED); /* This function may not be called with REGS_FIXED and nregs too big. */ assert (regs->num_regs >= nregs); rval = REGS_FIXED; } /* Copy the regs. */ for (i = 0; i < nregs; ++i) { regs->start[i] = pmatch[i].rm_so; regs->end[i] = pmatch[i].rm_eo; } for ( ; i < regs->num_regs; ++i) regs->start[i] = regs->end[i] = -1; return rval;}/* Set REGS to hold NUM_REGS registers, storing them in STARTS and ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use this memory for recording register information. STARTS and ENDS must be allocated using the malloc library routine, and must each be at least NUM_REGS * sizeof (regoff_t) bytes long. If NUM_REGS == 0, then subsequent matches should allocate their own register data. Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */voidre_set_registers (bufp, regs, num_regs, starts, ends) struct re_pattern_buffer *bufp; struct re_registers *regs; __re_size_t num_regs; regoff_t *starts, *ends;{ if (num_regs) { bufp->regs_allocated = REGS_REALLOCATE; regs->num_regs = num_regs; regs->start = starts; regs->end = ends; } else { bufp->regs_allocated = REGS_UNALLOCATED; regs->num_regs = 0; regs->start = regs->end = NULL; }}#ifdef _LIBCweak_alias (__re_set_registers, re_set_registers)#endif/* Entry points compatible with 4.2 BSD regex library. We don't define them unless specifically requested. */#if defined _REGEX_RE_COMP || defined _LIBCint# ifdef _LIBCweak_function# endifre_exec (s) const char *s;{ return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);}#endif /* _REGEX_RE_COMP *//* Internal entry point. *//* Searches for a compiled pattern PREG in the string STRING, whose length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same meaning as with regexec. LAST_START is START + RANGE, where START and RANGE have the same meaning as with re_search. Return REG_NOERROR if we find a match, and REG_NOMATCH if not, otherwise return the error code. Note: We assume front end functions already check ranges. (0 <= LAST_START && LAST_START <= LENGTH) */static reg_errcode_tinternal_functionre_search_internal (const regex_t *preg, const char *string, Idx length, Idx start, Idx last_start, Idx stop, size_t nmatch, regmatch_t pmatch[], int eflags){ reg_errcode_t err; const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; Idx left_lim, right_lim; int incr; bool fl_longest_match; int match_kind; Idx match_first; Idx match_last = REG_MISSING; Idx extra_nmatch; bool sb; int ch;#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) re_match_context_t mctx = { .dfa = dfa };#else re_match_context_t mctx;#endif char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate && start != last_start && !preg->can_be_null) ? preg->fastmap : NULL); RE_TRANSLATE_TYPE t = preg->translate;#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) memset (&mctx, '\0', sizeof (re_match_context_t)); mctx.dfa = dfa;#endif extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; nmatch -= extra_nmatch; /* Check if the DFA haven't been compiled. */ if (BE (preg->used == 0 || dfa->init_state == NULL || dfa->init_state_word == NULL || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL, 0)) return REG_NOMATCH;#ifdef DEBUG /* We assume front-end functions already check them. */ assert (0 <= last_start && last_start <= length);#endif /* If initial states with non-begbuf contexts have no elements, the regex must be anchored. If preg->newline_anchor is set, we'll never use init_state_nl, so do not check it. */ if (dfa->init_state->nodes.nelem == 0 && dfa->init_state_word->nodes.nelem == 0 && (dfa->init_state_nl->nodes.nelem == 0 || !preg->newline_anchor)) { if (start != 0 && last_start != 0) return REG_NOMATCH; start = last_start = 0; } /* We must check the longest matching, if nmatch > 0. */ fl_longest_match = (nmatch != 0 || dfa->nbackref); err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, preg->translate, preg->syntax & RE_ICASE, dfa); if (BE (err != REG_NOERROR, 0)) goto free_return; mctx.input.stop = stop; mctx.input.raw_stop = stop; mctx.input.newline_anchor = preg->newline_anchor; err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); if (BE (err != REG_NOERROR, 0)) goto free_return; /* We will log all the DFA states through which the dfa pass, if nmatch > 1, or this dfa has "multibyte node", which is a back-reference or a node which can accept multibyte character or multi character collating element. */ if (nmatch > 1 || dfa->has_mb_node) { /* Avoid overflow. */ if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) { err = REG_ESPACE; goto free_return; } mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); if (BE (mctx.state_log == NULL, 0)) { err = REG_ESPACE; goto free_return; } } else mctx.state_log = NULL; match_first = start; mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF : CONTEXT_NEWLINE | CONTEXT_BEGBUF; /* Check incrementally whether of not the input string match. */ incr = (last_start < start) ? -1 : 1; left_lim = (last_start < start) ? last_start : start; right_lim = (last_start < start) ? start : last_start; sb = dfa->mb_cur_max == 1; match_kind = (fastmap ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) | (start <= last_start ? 2 : 0) | (t != NULL ? 1 : 0)) : 8); for (;; match_first += incr) { err = REG_NOMATCH; if (match_first < left_lim || right_lim < match_first) goto free_return; /* Advance as rapidly as possible through the string, until we find a plausible place to start matching. This may be done with varying efficiency, so there are various possibilities: only the most common of them are specialized, in order to save on code size. We use a switch statement for speed. */ switch (match_kind) { case 8: /* No fastmap. */ break; case 7: /* Fastmap with single-byte translation, match forward. */ while (BE (match_first < right_lim, 1) && !fastmap[t[(unsigned char) string[match_first]]]) ++match_first; goto forward_match_found_start_or_reached_end; case 6: /* Fastmap without translation, match forward. */ while (BE (match_first < right_lim, 1) && !fastmap[(unsigned char) string[match_first]]) ++match_first; forward_match_found_start_or_reached_end: if (BE (match_first == right_lim, 0)) { ch = match_first >= length ? 0 : (unsigned char) string[match_first]; if (!fastmap[t ? t[ch] : ch]) goto free_return; } break; case 4: case 5: /* Fastmap without multi-byte translation, match backwards. */ while (match_first >= left_lim) { ch = match_first >= length ? 0 : (unsigned char) string[match_first]; if (fastmap[t ? t[ch] : ch]) break; --match_first; } if (match_first < left_lim) goto free_return; break; default: /* In this case, we can't determine easily the current byte, since it might be a component byte of a multibyte character. Then we use the constructed buffer instead. */ for (;;) { /* If MATCH_FIRST is out of the valid range, reconstruct the buffers. */ __re_size_t offset = match_first - mctx.input.raw_mbs_idx; if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0)) { err = re_string_reconstruct (&mctx.input, match_first, eflags); if (BE (err != REG_NOERROR, 0)) goto free_return; offset = match_first - mctx.input.raw_mbs_idx; } /* If MATCH_FIRST is out of the buffer, leave it as '\0'. Note that MATCH_FIRST must not be smaller than 0. */ ch = (match_first >= length ? 0 : re_string_byte_at (&mctx.input, offset)); if (fastmap[ch]) break; match_first += incr; if (match_first < left_lim || match_first > right_lim) { err = REG_NOMATCH; goto free_return; } } break; } /* Reconstruct the buffers so that the matcher can assume that the matching starts from the beginning of the buffer. */ err = re_string_reconstruct (&mctx.input, match_first, eflags); if (BE (err != REG_NOERROR, 0)) goto free_return;#ifdef RE_ENABLE_I18N /* Don't consider this char as a possible match start if it part, yet isn't the head, of a multibyte character. */ if (!sb && !re_string_first_byte (&mctx.input, 0)) continue;#endif /* It seems to be appropriate one, then use the matcher. */ /* We assume that the matching starts from 0. */ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; match_last = check_matching (&mctx, fl_longest_match, start <= last_start ? &match_first : NULL); if (match_last != REG_MISSING) { if (BE (match_last == REG_ERROR, 0)) { err = REG_ESPACE; goto free_return; } else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -