📄 regexec.c
字号:
goto free_return; if (sifted_states[0] != NULL || lim_states[0] != NULL) break; do { --match_last; if (match_last < 0) { ret = REG_NOMATCH; goto free_return; } } while (!mctx->state_log[match_last]->halt); halt_node = check_halt_state_context (preg, mctx->state_log[match_last], mctx, match_last); } ret = merge_state_array (dfa, sifted_states, lim_states, match_last + 1); re_free (lim_states); lim_states = NULL; if (BE (ret != REG_NOERROR, 0)) goto free_return; } else { sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last, 0); ret = sift_states_backward (preg, mctx, &sctx); re_node_set_free (&sctx.limits); if (BE (ret != REG_NOERROR, 0)) goto free_return; } re_free (mctx->state_log); mctx->state_log = sifted_states; sifted_states = NULL; mctx->last_node = halt_node; mctx->match_last = match_last; ret = REG_NOERROR; free_return: re_free (sifted_states); re_free (lim_states); return ret;}/* Acquire an initial state and return it. We must select appropriate initial state depending on the context, since initial states may have constraints like "\<", "^", etc.. */static inline re_dfastate_t *acquire_init_state_context (err, preg, mctx, idx) reg_errcode_t *err; const regex_t *preg; const re_match_context_t *mctx; int idx;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; *err = REG_NOERROR; if (dfa->init_state->has_constraint) { unsigned int context; context = re_string_context_at (mctx->input, idx - 1, mctx->eflags, preg->newline_anchor); if (IS_WORD_CONTEXT (context)) return dfa->init_state_word; else if (IS_ORDINARY_CONTEXT (context)) return dfa->init_state; else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) return dfa->init_state_begbuf; else if (IS_NEWLINE_CONTEXT (context)) return dfa->init_state_nl; else if (IS_BEGBUF_CONTEXT (context)) { /* It is relatively rare case, then calculate on demand. */ return re_acquire_state_context (err, dfa, dfa->init_state->entrance_nodes, context); } else /* Must not happen? */ return dfa->init_state; } else return dfa->init_state;}/* Check whether the regular expression match input string INPUT or not, and return the index where the matching end, return -1 if not match, or return -2 in case of an error. FL_SEARCH means we must search where the matching starts, FL_LONGEST_MATCH means we want the POSIX longest matching. Note that the matcher assume that the maching starts from the current index of the buffer. */static intcheck_matching (preg, mctx, fl_search, fl_longest_match) const regex_t *preg; re_match_context_t *mctx; int fl_search, fl_longest_match;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; reg_errcode_t err; int match = 0; int match_last = -1; int cur_str_idx = re_string_cur_idx (mctx->input); re_dfastate_t *cur_state; cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx); /* An initial state must not be NULL(invalid state). */ if (BE (cur_state == NULL, 0)) return -2; if (mctx->state_log != NULL) mctx->state_log[cur_str_idx] = cur_state; /* Check OP_OPEN_SUBEXP in the initial state in case that we use them later. E.g. Processing back references. */ if (dfa->nbackref) { err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0); if (BE (err != REG_NOERROR, 0)) return err; } if (cur_state->has_backref) { err = transit_state_bkref (preg, &cur_state->nodes, mctx); if (BE (err != REG_NOERROR, 0)) return err; } /* If the RE accepts NULL string. */ if (cur_state->halt) { if (!cur_state->has_constraint || check_halt_state_context (preg, cur_state, mctx, cur_str_idx)) { if (!fl_longest_match) return cur_str_idx; else { match_last = cur_str_idx; match = 1; } } } while (!re_string_eoi (mctx->input)) { cur_state = transit_state (&err, preg, mctx, cur_state, fl_search && !match); if (cur_state == NULL) /* Reached at the invalid state or an error. */ { cur_str_idx = re_string_cur_idx (mctx->input); if (BE (err != REG_NOERROR, 0)) return -2; if (fl_search && !match) { /* Restart from initial state, since we are searching the point from where matching start. */#ifdef RE_ENABLE_I18N if (MB_CUR_MAX == 1 || re_string_first_byte (mctx->input, cur_str_idx))#endif /* RE_ENABLE_I18N */ cur_state = acquire_init_state_context (&err, preg, mctx, cur_str_idx); if (BE (cur_state == NULL && err != REG_NOERROR, 0)) return -2; if (mctx->state_log != NULL) mctx->state_log[cur_str_idx] = cur_state; } else if (!fl_longest_match && match) break; else /* (fl_longest_match && match) || (!fl_search && !match) */ { if (mctx->state_log == NULL) break; else { int max = mctx->state_log_top; for (; cur_str_idx <= max; ++cur_str_idx) if (mctx->state_log[cur_str_idx] != NULL) break; if (cur_str_idx > max) break; } } } if (cur_state != NULL && cur_state->halt) { /* Reached at a halt state. Check the halt state can satisfy the current context. */ if (!cur_state->has_constraint || check_halt_state_context (preg, cur_state, mctx, re_string_cur_idx (mctx->input))) { /* We found an appropriate halt state. */ match_last = re_string_cur_idx (mctx->input); match = 1; if (!fl_longest_match) break; } } } return match_last;}/* Check NODE match the current context. */static int check_halt_node_context (dfa, node, context) const re_dfa_t *dfa; int node; unsigned int context;{ re_token_type_t type = dfa->nodes[node].type; unsigned int constraint = dfa->nodes[node].constraint; if (type != END_OF_RE) return 0; if (!constraint) return 1; if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) return 0; return 1;}/* Check the halt state STATE match the current context. Return 0 if not match, if the node, STATE has, is a halt node and match the context, return the node. */static intcheck_halt_state_context (preg, state, mctx, idx) const regex_t *preg; const re_dfastate_t *state; const re_match_context_t *mctx; int idx;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int i; unsigned int context;#ifdef DEBUG assert (state->halt);#endif context = re_string_context_at (mctx->input, idx, mctx->eflags, preg->newline_anchor); for (i = 0; i < state->nodes.nelem; ++i) if (check_halt_node_context (dfa, state->nodes.elems[i], context)) return state->nodes.elems[i]; return 0;}/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA corresponding to the DFA). Return the destination node, and update EPS_VIA_NODES, return -1 in case of errors. */static intproceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs) const regex_t *preg; regmatch_t *regs; const re_match_context_t *mctx; int nregs, *pidx, node; re_node_set *eps_via_nodes; struct re_fail_stack_t *fs;{ re_dfa_t *dfa = (re_dfa_t *)preg->buffer; int i, err, dest_node; dest_node = -1; if (IS_EPSILON_NODE (dfa->nodes[node].type)) { re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; int ndest, dest_nodes[2]; err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) return -1; /* Pick up valid destinations. */ for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i) { int candidate = dfa->edests[node].elems[i]; if (!re_node_set_contains (cur_nodes, candidate)) continue; dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0]; dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1]; ++ndest; } if (ndest <= 1) return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0); /* In order to avoid infinite loop like "(a*)*". */ if (re_node_set_contains (eps_via_nodes, dest_nodes[0])) return dest_nodes[1]; if (fs != NULL) push_fail_stack (fs, *pidx, dest_nodes, nregs, regs, eps_via_nodes); return dest_nodes[0]; } else { int naccepted = 0; re_token_type_t type = dfa->nodes[node].type;#ifdef RE_ENABLE_I18N if (ACCEPT_MB_NODE (type)) naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx); else#endif /* RE_ENABLE_I18N */ if (type == OP_BACK_REF) { int subexp_idx = dfa->nodes[node].opr.idx; naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; if (fs != NULL) { if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) return -1; else if (naccepted) { char *buf = (char *) re_string_get_buffer (mctx->input); if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, naccepted) != 0) return -1; } } if (naccepted == 0) { err = re_node_set_insert (eps_via_nodes, node); if (BE (err < 0, 0)) return -2; dest_node = dfa->edests[node].elems[0]; if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, dest_node)) return dest_node; } } if (naccepted != 0 || check_node_accept (preg, dfa->nodes + node, mctx, *pidx)) { dest_node = dfa->nexts[node]; *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, dest_node))) return -1; re_node_set_empty (eps_via_nodes); return dest_node; } } return -1;}static reg_errcode_tpush_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes) struct re_fail_stack_t *fs; int str_idx, *dests, nregs; regmatch_t *regs; re_node_set *eps_via_nodes;{ reg_errcode_t err; int num = fs->num++; if (fs->num == fs->alloc) { struct re_fail_stack_ent_t *new_array; fs->alloc *= 2; new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) * fs->alloc)); if (new_array == NULL) return REG_ESPACE; fs->stack = new_array; } fs->stack[num].idx = str_idx; fs->stack[num].node = dests[1]; fs->stack[num].regs = re_malloc (regmatch_t, nregs); memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); return err;}static intpop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) struct re_fail_stack_t *fs; int *pidx, nregs; regmatch_t *regs; re_node_set *eps_via_nodes;{ int num = --fs->num; assert (num >= 0); *pidx = fs->stack[num].idx; memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); re_node_set_free (eps_via_nodes); re_free (fs->stack[num].regs); *eps_via_nodes = fs->stack[num].eps_via_nodes; return fs->stack[num].node;}/* Set the positions where the subexpressions are starts/ends to registers PMATCH. Note: We assume that pmatch[0] is already set, and pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */static reg_errcode_tset_regs (preg, mctx, nmatch, pmatch, fl_backtrack) const regex_t *preg; const re_match_context_t *mctx; size_t nmatch; regmatch_t *pmatch; int fl_backtrack;{ re_dfa_t *dfa = (re_dfa_t *)preg->buffer; int idx, cur_node, real_nmatch; re_node_set eps_via_nodes; struct re_fail_stack_t *fs; struct re_fail_stack_t fs_body = {0, 2, NULL};#ifdef DEBUG assert (nmatch > 1); assert (mctx->state_log != NULL);#endif if (fl_backtrack) { fs = &fs_body; fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); } else fs = NULL; cur_node = dfa->init_node; real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1; re_node_set_init_empty (&eps_via_nodes); for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) { update_regs (dfa, pmatch, cur_node, idx, real_nmatch); if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) { int reg_idx;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -