📄 regexec.c
字号:
{ assert (pmatch[0].rm_so == start); rval = pmatch[0].rm_eo - start; } else rval = pmatch[0].rm_so; } re_free (pmatch); return rval;}static unsignedre_copy_regs (regs, pmatch, nregs, regs_allocated) struct re_registers *regs; regmatch_t *pmatch; int nregs, regs_allocated;{ int rval = REGS_REALLOCATE; int i; int need_regs = nregs + 1; /* We need one extra element beyond `num_regs' for the `-1' marker GNU code uses. */ /* Have the register data arrays been allocated? */ if (regs_allocated == REGS_UNALLOCATED) { /* No. So allocate them with malloc. */ regs->start = re_malloc (regoff_t, need_regs); if (BE (regs->start == NULL, 0)) return REGS_UNALLOCATED; regs->end = re_malloc (regoff_t, need_regs); if (BE (regs->end == NULL, 0)) { re_free (regs->start); return REGS_UNALLOCATED; } regs->num_regs = need_regs; } else if (regs_allocated == REGS_REALLOCATE) { /* Yes. If we need more elements than were already allocated, reallocate them. If we need fewer, just leave it alone. */ if (need_regs > regs->num_regs) { regs->start = re_realloc (regs->start, regoff_t, need_regs); if (BE (regs->start == NULL, 0)) { if (regs->end != NULL) re_free (regs->end); return REGS_UNALLOCATED; } regs->end = re_realloc (regs->end, regoff_t, need_regs); if (BE (regs->end == NULL, 0)) { re_free (regs->start); return REGS_UNALLOCATED; } regs->num_regs = need_regs; } } else { assert (regs_allocated == REGS_FIXED); /* This function may not be called with REGS_FIXED and nregs too big. */ assert (regs->num_regs >= nregs); rval = REGS_FIXED; } /* Copy the regs. */ for (i = 0; i < nregs; ++i) { regs->start[i] = pmatch[i].rm_so; regs->end[i] = pmatch[i].rm_eo; } for ( ; i < regs->num_regs; ++i) regs->start[i] = regs->end[i] = -1; return rval;}/* Set REGS to hold NUM_REGS registers, storing them in STARTS and ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use this memory for recording register information. STARTS and ENDS must be allocated using the malloc library routine, and must each be at least NUM_REGS * sizeof (regoff_t) bytes long. If NUM_REGS == 0, then subsequent matches should allocate their own register data. Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */voidre_set_registers (bufp, regs, num_regs, starts, ends) struct re_pattern_buffer *bufp; struct re_registers *regs; unsigned num_regs; regoff_t *starts, *ends;{ if (num_regs) { bufp->regs_allocated = REGS_REALLOCATE; regs->num_regs = num_regs; regs->start = starts; regs->end = ends; } else { bufp->regs_allocated = REGS_UNALLOCATED; regs->num_regs = 0; regs->start = regs->end = (regoff_t *) 0; }}#ifdef _LIBCweak_alias (__re_set_registers, re_set_registers)#endif/* Entry points compatible with 4.2 BSD regex library. We don't define them unless specifically requested. */#if defined _REGEX_RE_COMP || defined _LIBCint# ifdef _LIBCweak_function# endifre_exec (s) const char *s;{ return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);}#endif /* _REGEX_RE_COMP */static re_node_set empty_set;/* Internal entry point. *//* Searches for a compiled pattern PREG in the string STRING, whose length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same mingings with regexec. START, and RANGE have the same meanings with re_search. Return REG_NOERROR if we find a match, and REG_NOMATCH if not, otherwise return the error code. Note: We assume front end functions already check ranges. (START + RANGE >= 0 && START + RANGE <= LENGTH) */static reg_errcode_tre_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, eflags) const regex_t *preg; const char *string; int length, start, range, stop, eflags; size_t nmatch; regmatch_t pmatch[];{ reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; re_string_t input; int left_lim, right_lim, incr; int fl_longest_match, match_first, match_last = -1; int fast_translate, sb; re_match_context_t mctx; char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate && range && !preg->can_be_null) ? preg->fastmap : NULL); /* Check if the DFA haven't been compiled. */ if (BE (preg->used == 0 || dfa->init_state == NULL || dfa->init_state_word == NULL || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL, 0)) return REG_NOMATCH; re_node_set_init_empty (&empty_set); memset (&mctx, '\0', sizeof (re_match_context_t)); /* We must check the longest matching, if nmatch > 0. */ fl_longest_match = (nmatch != 0 || dfa->nbackref); err = re_string_allocate (&input, string, length, dfa->nodes_len + 1, preg->translate, preg->syntax & RE_ICASE); if (BE (err != REG_NOERROR, 0)) goto free_return; input.stop = stop; err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2); if (BE (err != REG_NOERROR, 0)) goto free_return; /* We will log all the DFA states through which the dfa pass, if nmatch > 1, or this dfa has "multibyte node", which is a back-reference or a node which can accept multibyte character or multi character collating element. */ if (nmatch > 1 || dfa->has_mb_node) { mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1); if (BE (mctx.state_log == NULL, 0)) { err = REG_ESPACE; goto free_return; } } else mctx.state_log = NULL;#ifdef DEBUG /* We assume front-end functions already check them. */ assert (start + range >= 0 && start + range <= length);#endif match_first = start; input.tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF : CONTEXT_NEWLINE | CONTEXT_BEGBUF); /* Check incrementally whether of not the input string match. */ incr = (range < 0) ? -1 : 1; left_lim = (range < 0) ? start + range : start; right_lim = (range < 0) ? start : start + range; sb = MB_CUR_MAX == 1; fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate); for (;;) { /* At first get the current byte from input string. */ if (fastmap) { if (BE (fast_translate, 1)) { unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate; if (BE (range >= 0, 1)) { if (BE (t != NULL, 0)) { while (BE (match_first < right_lim, 1) && !fastmap[t[(unsigned char) string[match_first]]]) ++match_first; } else { while (BE (match_first < right_lim, 1) && !fastmap[(unsigned char) string[match_first]]) ++match_first; } if (BE (match_first == right_lim, 0)) { int ch = match_first >= length ? 0 : (unsigned char) string[match_first]; if (!fastmap[t ? t[ch] : ch]) break; } } else { while (match_first >= left_lim) { int ch = match_first >= length ? 0 : (unsigned char) string[match_first]; if (fastmap[t ? t[ch] : ch]) break; --match_first; } if (match_first < left_lim) break; } } else { int ch; do { /* In this case, we can't determine easily the current byte, since it might be a component byte of a multibyte character. Then we use the constructed buffer instead. */ /* If MATCH_FIRST is out of the valid range, reconstruct the buffers. */ if (input.raw_mbs_idx + input.valid_len <= match_first || match_first < input.raw_mbs_idx) { err = re_string_reconstruct (&input, match_first, eflags, preg->newline_anchor); if (BE (err != REG_NOERROR, 0)) goto free_return; } /* If MATCH_FIRST is out of the buffer, leave it as '\0'. Note that MATCH_FIRST must not be smaller than 0. */ ch = ((match_first >= length) ? 0 : re_string_byte_at (&input, match_first - input.raw_mbs_idx)); if (fastmap[ch]) break; match_first += incr; } while (match_first >= left_lim && match_first <= right_lim); if (! fastmap[ch]) break; } } /* Reconstruct the buffers so that the matcher can assume that the matching starts from the begining of the buffer. */ err = re_string_reconstruct (&input, match_first, eflags, preg->newline_anchor); if (BE (err != REG_NOERROR, 0)) goto free_return;#ifdef RE_ENABLE_I18N /* Eliminate it when it is a component of a multibyte character and isn't the head of a multibyte character. */ if (sb || re_string_first_byte (&input, 0))#endif { /* It seems to be appropriate one, then use the matcher. */ /* We assume that the matching starts from 0. */ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; match_last = check_matching (preg, &mctx, 0, fl_longest_match); if (match_last != -1) { if (BE (match_last == -2, 0)) { err = REG_ESPACE; goto free_return; } else { mctx.match_last = match_last; if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) { re_dfastate_t *pstate = mctx.state_log[match_last]; mctx.last_node = check_halt_state_context (preg, pstate, &mctx, match_last); } if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) || dfa->nbackref) { err = prune_impossible_nodes (preg, &mctx); if (err == REG_NOERROR) break; if (BE (err != REG_NOMATCH, 0)) goto free_return; } else break; /* We found a matching. */ } } match_ctx_clean (&mctx); } /* Update counter. */ match_first += incr; if (match_first < left_lim || right_lim < match_first) break; } /* Set pmatch[] if we need. */ if (match_last != -1 && nmatch > 0) { int reg_idx; /* Initialize registers. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; /* Set the points where matching start/end. */ pmatch[0].rm_so = 0; pmatch[0].rm_eo = mctx.match_last; if (!preg->no_sub && nmatch > 1) { err = set_regs (preg, &mctx, nmatch, pmatch, dfa->has_plural_match && dfa->nbackref > 0); if (BE (err != REG_NOERROR, 0)) goto free_return; } /* At last, add the offset to the each registers, since we slided the buffers so that We can assume that the matching starts from 0. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) if (pmatch[reg_idx].rm_so != -1) { pmatch[reg_idx].rm_so += match_first; pmatch[reg_idx].rm_eo += match_first; } } err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR; free_return: re_free (mctx.state_log); if (dfa->nbackref) match_ctx_free (&mctx); re_string_destruct (&input); return err;}static reg_errcode_tprune_impossible_nodes (preg, mctx) const regex_t *preg; re_match_context_t *mctx;{ int halt_node, match_last; reg_errcode_t ret; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; re_dfastate_t **sifted_states; re_dfastate_t **lim_states = NULL; re_sift_context_t sctx;#ifdef DEBUG assert (mctx->state_log != NULL);#endif match_last = mctx->match_last; halt_node = mctx->last_node; sifted_states = re_malloc (re_dfastate_t *, match_last + 1); if (BE (sifted_states == NULL, 0)) { ret = REG_ESPACE; goto free_return; } if (dfa->nbackref) { lim_states = re_malloc (re_dfastate_t *, match_last + 1); if (BE (lim_states == NULL, 0)) { ret = REG_ESPACE; goto free_return; } while (1) { memset (lim_states, '\0', sizeof (re_dfastate_t *) * (match_last + 1)); match_ctx_clear_flag (mctx); sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last, 0); ret = sift_states_backward (preg, mctx, &sctx); re_node_set_free (&sctx.limits); if (BE (ret != REG_NOERROR, 0))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -