📄 regexec.c
字号:
return REG_NOERROR;}/* Helper functions. */static reg_errcode_tinternal_functionclean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx){ Idx top = mctx->state_log_top; if (next_state_log_idx >= mctx->input.bufs_len || (next_state_log_idx >= mctx->input.valid_len && mctx->input.valid_len < mctx->input.len)) { reg_errcode_t err; err = extend_buffers (mctx); if (BE (err != REG_NOERROR, 0)) return err; } if (top < next_state_log_idx) { memset (mctx->state_log + top + 1, '\0', sizeof (re_dfastate_t *) * (next_state_log_idx - top)); mctx->state_log_top = next_state_log_idx; } return REG_NOERROR;}static reg_errcode_tinternal_functionmerge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src, Idx num){ Idx st_idx; reg_errcode_t err; for (st_idx = 0; st_idx < num; ++st_idx) { if (dst[st_idx] == NULL) dst[st_idx] = src[st_idx]; else if (src[st_idx] != NULL) { re_node_set merged_set; err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, &src[st_idx]->nodes); if (BE (err != REG_NOERROR, 0)) return err; dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); re_node_set_free (&merged_set); if (BE (err != REG_NOERROR, 0)) return err; } } return REG_NOERROR;}static reg_errcode_tinternal_functionupdate_cur_sifted_state (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, re_node_set *dest_nodes){ const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err = REG_NOERROR; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? NULL : &mctx->state_log[str_idx]->nodes); if (dest_nodes->nelem == 0) sctx->sifted_states[str_idx] = NULL; else { if (candidates) { /* At first, add the nodes which can epsilon transit to a node in DEST_NODE. */ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) return err; /* Then, check the limitations in the current sift_context. */ if (sctx->limits.nelem) { err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, mctx->bkref_ents, str_idx); if (BE (err != REG_NOERROR, 0)) return err; } } sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); if (BE (err != REG_NOERROR, 0)) return err; } if (candidates && mctx->state_log[str_idx]->has_backref) { err = sift_states_bkref (mctx, sctx, str_idx, candidates); if (BE (err != REG_NOERROR, 0)) return err; } return REG_NOERROR;}static reg_errcode_tinternal_functionadd_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates){ reg_errcode_t err = REG_NOERROR; Idx i; re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); if (BE (err != REG_NOERROR, 0)) return err; if (!state->inveclosure.alloc) { err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); if (BE (err != REG_NOERROR, 0)) return REG_ESPACE; for (i = 0; i < dest_nodes->nelem; i++) re_node_set_merge (&state->inveclosure, dfa->inveclosures + dest_nodes->elems[i]); } return re_node_set_add_intersect (dest_nodes, candidates, &state->inveclosure);}static reg_errcode_tinternal_functionsub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, const re_node_set *candidates){ Idx ecl_idx; reg_errcode_t err; re_node_set *inv_eclosure = dfa->inveclosures + node; re_node_set except_nodes; re_node_set_init_empty (&except_nodes); for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { Idx cur_node = inv_eclosure->elems[ecl_idx]; if (cur_node == node) continue; if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) { Idx edst1 = dfa->edests[cur_node].elems[0]; Idx edst2 = ((dfa->edests[cur_node].nelem > 1) ? dfa->edests[cur_node].elems[1] : REG_MISSING); if ((!re_node_set_contains (inv_eclosure, edst1) && re_node_set_contains (dest_nodes, edst1)) || (REG_VALID_NONZERO_INDEX (edst2) && !re_node_set_contains (inv_eclosure, edst2) && re_node_set_contains (dest_nodes, edst2))) { err = re_node_set_add_intersect (&except_nodes, candidates, dfa->inveclosures + cur_node); if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&except_nodes); return err; } } } } for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { Idx cur_node = inv_eclosure->elems[ecl_idx]; if (!re_node_set_contains (&except_nodes, cur_node)) { Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1; re_node_set_remove_at (dest_nodes, idx); } } re_node_set_free (&except_nodes); return REG_NOERROR;}static boolinternal_functioncheck_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx){ const re_dfa_t *const dfa = mctx->dfa; Idx lim_idx, src_pos, dst_pos; Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) { Idx subexp_idx; struct re_backref_cache_entry *ent; ent = mctx->bkref_ents + limits->elems[lim_idx]; subexp_idx = dfa->nodes[ent->node].opr.idx; dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], subexp_idx, dst_node, dst_idx, dst_bkref_idx); src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], subexp_idx, src_node, src_idx, src_bkref_idx); /* In case of: <src> <dst> ( <subexp> ) ( <subexp> ) <src> <dst> ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ if (src_pos == dst_pos) continue; /* This is unrelated limitation. */ else return true; } return false;}static intinternal_functioncheck_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, Idx subexp_idx, Idx from_node, Idx bkref_idx){ const re_dfa_t *const dfa = mctx->dfa; const re_node_set *eclosures = dfa->eclosures + from_node; Idx node_idx; /* Else, we are on the boundary: examine the nodes on the epsilon closure. */ for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) { Idx node = eclosures->elems[node_idx]; switch (dfa->nodes[node].type) { case OP_BACK_REF: if (bkref_idx != REG_MISSING) { struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; do { Idx dst; int cpos; if (ent->node != node) continue; if (subexp_idx < BITSET_WORD_BITS && !(ent->eps_reachable_subexps_map & ((bitset_word_t) 1 << subexp_idx))) continue; /* Recurse trying to reach the OP_OPEN_SUBEXP and OP_CLOSE_SUBEXP cases below. But, if the destination node is the same node as the source node, don't recurse because it would cause an infinite loop: a regex that exhibits this behavior is ()\1*\1* */ dst = dfa->edests[node].elems[0]; if (dst == from_node) { if (boundaries & 1) return -1; else /* if (boundaries & 2) */ return 0; } cpos = check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, dst, bkref_idx); if (cpos == -1 /* && (boundaries & 1) */) return -1; if (cpos == 0 && (boundaries & 2)) return 0; if (subexp_idx < BITSET_WORD_BITS) ent->eps_reachable_subexps_map &= ~((bitset_word_t) 1 << subexp_idx); } while (ent++->more); } break; case OP_OPEN_SUBEXP: if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) return -1; break; case OP_CLOSE_SUBEXP: if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) return 0; break; default: break; } } return (boundaries & 2) ? 1 : 0;}static intinternal_functioncheck_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, Idx subexp_idx, Idx from_node, Idx str_idx, Idx bkref_idx){ struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; int boundaries; /* If we are outside the range of the subexpression, return -1 or 1. */ if (str_idx < lim->subexp_from) return -1; if (lim->subexp_to < str_idx) return 1; /* If we are within the subexpression, return 0. */ boundaries = (str_idx == lim->subexp_from); boundaries |= (str_idx == lim->subexp_to) << 1; if (boundaries == 0) return 0; /* Else, examine epsilon closure. */ return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx);}/* Check the limitations of sub expressions LIMITS, and remove the nodes which are against limitations from DEST_NODES. */static reg_errcode_tinternal_functioncheck_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates, re_node_set *limits, struct re_backref_cache_entry *bkref_ents, Idx str_idx){ reg_errcode_t err; Idx node_idx, lim_idx; for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) { Idx subexp_idx; struct re_backref_cache_entry *ent; ent = bkref_ents + limits->elems[lim_idx]; if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) continue; /* This is unrelated limitation. */ subexp_idx = dfa->nodes[ent->node].opr.idx; if (ent->subexp_to == str_idx) { Idx ops_node = REG_MISSING; Idx cls_node = REG_MISSING; for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { Idx node = dest_nodes->elems[node_idx]; re_token_type_t type = dfa->nodes[node].type; if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx) ops_node = node; else if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx) cls_node = node; } /* Check the limitation of the open subexpression. */ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ if (REG_VALID_INDEX (ops_node)) { err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) return err; } /* Check the limitation of the close subexpression. */ if (REG_VALID_INDEX (cls_node)) for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { Idx node = dest_nodes->elems[node_idx]; if (!re_node_set_contains (dfa->inveclosures + node, cls_node) && !re_node_set_contains (dfa->eclosures + node, cls_node)) { /* It is against this limitation. Remove it form the current sifted state. */ err = sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) return err; --node_idx; } } } else /* (ent->subexp_to != str_idx) */ { for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { Idx node = dest_nodes->elems[node_idx]; re_token_type_t type = dfa->nodes[node].type; if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) { if (subexp_idx != dfa->nodes[node].opr.idx) continue; /* It is against this limitation. Remove it form the current sifted state. */ err = sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) return err; } } } } return REG_NOERROR;}static reg_errcode_tinternal_functionsift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx str_idx, const re_node_set *candidates){ const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; Idx node_idx, node; re_sift_context_t local_sctx; Idx first_idx = search_cur_bkref_entry (mctx, str_idx); if (first_idx == REG_MISSING) return REG_NOERROR; local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) { Idx enabled_idx; re_tok
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -