📄 regexec.c
字号:
dest_node = candidate; else { /* In order to avoid infinite loop like "(a*)*", return the second epsilon-transition if the first was already considered. */ if (re_node_set_contains (eps_via_nodes, dest_node)) return candidate; /* Otherwise, push the second epsilon-transition on the fail stack. */ else if (fs != NULL && push_fail_stack (fs, *pidx, candidate, nregs, regs, eps_via_nodes)) return REG_ERROR; /* We know we are going to exit. */ break; } } return dest_node; } else { Idx naccepted = 0; re_token_type_t type = dfa->nodes[node].type;#ifdef RE_ENABLE_I18N if (dfa->nodes[node].accept_mb) naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); else#endif /* RE_ENABLE_I18N */ if (type == OP_BACK_REF) { Idx subexp_idx = dfa->nodes[node].opr.idx + 1; naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; if (fs != NULL) { if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) return REG_MISSING; else if (naccepted) { char *buf = (char *) re_string_get_buffer (&mctx->input); if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, naccepted) != 0) return REG_MISSING; } } if (naccepted == 0) { Idx dest_node; ok = re_node_set_insert (eps_via_nodes, node); if (BE (! ok, 0)) return REG_ERROR; dest_node = dfa->edests[node].elems[0]; if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, dest_node)) return dest_node; } } if (naccepted != 0 || check_node_accept (mctx, dfa->nodes + node, *pidx)) { Idx dest_node = dfa->nexts[node]; *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, dest_node))) return REG_MISSING; re_node_set_empty (eps_via_nodes); return dest_node; } } return REG_MISSING;}static reg_errcode_tinternal_functionpush_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes){ reg_errcode_t err; Idx num = fs->num++; if (fs->num == fs->alloc) { struct re_fail_stack_ent_t *new_array; new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) * fs->alloc * 2)); if (new_array == NULL) return REG_ESPACE; fs->alloc *= 2; fs->stack = new_array; } fs->stack[num].idx = str_idx; fs->stack[num].node = dest_node; fs->stack[num].regs = re_malloc (regmatch_t, nregs); if (fs->stack[num].regs == NULL) return REG_ESPACE; memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); return err;}static Idxinternal_functionpop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes){ Idx num = --fs->num; assert (REG_VALID_INDEX (num)); *pidx = fs->stack[num].idx; memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); re_node_set_free (eps_via_nodes); re_free (fs->stack[num].regs); *eps_via_nodes = fs->stack[num].eps_via_nodes; return fs->stack[num].node;}/* Set the positions where the subexpressions are starts/ends to registers PMATCH. Note: We assume that pmatch[0] is already set, and pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */static reg_errcode_tinternal_functionset_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, bool fl_backtrack){ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; Idx idx, cur_node; re_node_set eps_via_nodes; struct re_fail_stack_t *fs; struct re_fail_stack_t fs_body = { 0, 2, NULL }; regmatch_t *prev_idx_match; bool prev_idx_match_malloced = false;#ifdef DEBUG assert (nmatch > 1); assert (mctx->state_log != NULL);#endif if (fl_backtrack) { fs = &fs_body; fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); if (fs->stack == NULL) return REG_ESPACE; } else fs = NULL; cur_node = dfa->init_node; re_node_set_init_empty (&eps_via_nodes); if (__libc_use_alloca (nmatch * sizeof (regmatch_t))) prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t)); else { prev_idx_match = re_malloc (regmatch_t, nmatch); if (prev_idx_match == NULL) { free_fail_stack_return (fs); return REG_ESPACE; } prev_idx_match_malloced = true; } memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) { update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) { Idx reg_idx; if (fs) { for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) break; if (reg_idx == nmatch) { re_node_set_free (&eps_via_nodes); if (prev_idx_match_malloced) re_free (prev_idx_match); return free_fail_stack_return (fs); } cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, &eps_via_nodes); } else { re_node_set_free (&eps_via_nodes); if (prev_idx_match_malloced) re_free (prev_idx_match); return REG_NOERROR; } } /* Proceed to next node. */ cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, &eps_via_nodes, fs); if (BE (! REG_VALID_INDEX (cur_node), 0)) { if (BE (cur_node == REG_ERROR, 0)) { re_node_set_free (&eps_via_nodes); if (prev_idx_match_malloced) re_free (prev_idx_match); free_fail_stack_return (fs); return REG_ESPACE; } if (fs) cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, &eps_via_nodes); else { re_node_set_free (&eps_via_nodes); if (prev_idx_match_malloced) re_free (prev_idx_match); return REG_NOMATCH; } } } re_node_set_free (&eps_via_nodes); if (prev_idx_match_malloced) re_free (prev_idx_match); return free_fail_stack_return (fs);}static reg_errcode_tinternal_functionfree_fail_stack_return (struct re_fail_stack_t *fs){ if (fs) { Idx fs_idx; for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) { re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); re_free (fs->stack[fs_idx].regs); } re_free (fs->stack); } return REG_NOERROR;}static voidinternal_functionupdate_regs (const re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch){ int type = dfa->nodes[cur_node].type; if (type == OP_OPEN_SUBEXP) { Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; /* We are at the first node of this sub expression. */ if (reg_num < nmatch) { pmatch[reg_num].rm_so = cur_idx; pmatch[reg_num].rm_eo = -1; } } else if (type == OP_CLOSE_SUBEXP) { Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; if (reg_num < nmatch) { /* We are at the last node of this sub expression. */ if (pmatch[reg_num].rm_so < cur_idx) { pmatch[reg_num].rm_eo = cur_idx; /* This is a non-empty match or we are not inside an optional subexpression. Accept this right away. */ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); } else { if (dfa->nodes[cur_node].opt_subexp && prev_idx_match[reg_num].rm_so != -1) /* We transited through an empty match for an optional subexpression, like (a?)*, and this is not the subexp's first match. Copy back the old content of the registers so that matches of an inner subexpression are undone as well, like in ((a?))*. */ memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); else /* We completed a subexpression, but it may be part of an optional one, so do not update PREV_IDX_MATCH. */ pmatch[reg_num].rm_eo = cur_idx; } } }}/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 and sift the nodes in each states according to the following rules. Updated state_log will be wrote to STATE_LOG. Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... 1. When STR_IDX == MATCH_LAST(the last index in the state_log): If `a' isn't the LAST_NODE and `a' can't epsilon transit to the LAST_NODE, we throw away the node `a'. 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts string `s' and transit to `b': i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw away the node `a'. ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is thrown away, we throw away the node `a'. 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the node `a'. ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, we throw away the node `a'. */#define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node))static reg_errcode_tinternal_functionsift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx){ reg_errcode_t err; int null_cnt = 0; Idx str_idx = sctx->last_str_idx; re_node_set cur_dest;#ifdef DEBUG assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);#endif /* Build sifted state_log[str_idx]. It has the nodes which can epsilon transit to the last_node and the last_node itself. */ err = re_node_set_init_1 (&cur_dest, sctx->last_node); if (BE (err != REG_NOERROR, 0)) return err; err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) goto free_return; /* Then check each states in the state_log. */ while (str_idx > 0) { /* Update counters. */ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; if (null_cnt > mctx->max_mb_elem_len) { memset (sctx->sifted_states, '\0', sizeof (re_dfastate_t *) * str_idx); re_node_set_free (&cur_dest); return REG_NOERROR; } re_node_set_empty (&cur_dest); --str_idx; if (mctx->state_log[str_idx]) { err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) goto free_return; } /* Add all the nodes which satisfy the following conditions: - It can epsilon transit to a node in CUR_DEST. - It is in CUR_SRC. And update state_log. */ err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) goto free_return; } err = REG_NOERROR; free_return: re_node_set_free (&cur_dest); return err;}static reg_errcode_tinternal_functionbuild_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, re_node_set *cur_dest){ const re_dfa_t *const dfa = mctx->dfa; const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; Idx i; /* Then build the next sifted state. We build the next sifted state on `cur_dest', and update `sifted_states[str_idx]' with `cur_dest'. Note: `cur_dest' is the sifted state from `state_log[str_idx + 1]'. `cur_src' points the node_set of the old `state_log[str_idx]' (with the epsilon nodes pre-filtered out). */ for (i = 0; i < cur_src->nelem; i++) { Idx prev_node = cur_src->elems[i]; int naccepted = 0; bool ok;#ifdef DEBUG re_token_type_t type = dfa->nodes[prev_node].type; assert (!IS_EPSILON_NODE (type));#endif#ifdef RE_ENABLE_I18N /* If the node may accept `multi byte'. */ if (dfa->nodes[prev_node].accept_mb) naccepted = sift_states_iter_mb (mctx, sctx, prev_node, str_idx, sctx->last_str_idx);#endif /* RE_ENABLE_I18N */ /* We don't check backreferences here. See update_cur_sifted_state(). */ if (!naccepted && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], dfa->nexts[prev_node])) naccepted = 1; if (naccepted == 0) continue; if (sctx->limits.nelem) { Idx to_idx = str_idx + naccepted; if (check_dst_limits (mctx, &sctx->limits, dfa->nexts[prev_node], to_idx, prev_node, str_idx)) continue; } ok = re_node_set_insert (cur_dest, prev_node); if (BE (! ok, 0)) return REG_ESPACE; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -