📄 regexec.c
字号:
if (fs) { for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) break; if (reg_idx == nmatch) { re_node_set_free (&eps_via_nodes); return free_fail_stack_return (fs); } cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, &eps_via_nodes); } else { re_node_set_free (&eps_via_nodes); return REG_NOERROR; } } /* Proceed to next node. */ cur_node = proceed_next_node (preg, nmatch, pmatch, mctx, &idx, cur_node, &eps_via_nodes, fs); if (BE (cur_node < 0, 0)) { if (cur_node == -2) return REG_ESPACE; if (fs) cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, &eps_via_nodes); else { re_node_set_free (&eps_via_nodes); return REG_NOMATCH; } } } re_node_set_free (&eps_via_nodes); return free_fail_stack_return (fs);}static reg_errcode_tfree_fail_stack_return (fs) struct re_fail_stack_t *fs;{ if (fs) { int fs_idx; for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) { re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); re_free (fs->stack[fs_idx].regs); } re_free (fs->stack); } return REG_NOERROR;}static voidupdate_regs (dfa, pmatch, cur_node, cur_idx, nmatch) re_dfa_t *dfa; regmatch_t *pmatch; int cur_node, cur_idx, nmatch;{ int type = dfa->nodes[cur_node].type; int reg_num; if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP) return; reg_num = dfa->nodes[cur_node].opr.idx + 1; if (reg_num >= nmatch) return; if (type == OP_OPEN_SUBEXP) { /* We are at the first node of this sub expression. */ pmatch[reg_num].rm_so = cur_idx; pmatch[reg_num].rm_eo = -1; } else if (type == OP_CLOSE_SUBEXP) /* We are at the first node of this sub expression. */ pmatch[reg_num].rm_eo = cur_idx;}#define NUMBER_OF_STATE 1/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 and sift the nodes in each states according to the following rules. Updated state_log will be wrote to STATE_LOG. Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... 1. When STR_IDX == MATCH_LAST(the last index in the state_log): If `a' isn't the LAST_NODE and `a' can't epsilon transit to the LAST_NODE, we throw away the node `a'. 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts string `s' and transit to `b': i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw away the node `a'. ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is throwed away, we throw away the node `a'. 3. When 0 <= STR_IDX < n and 'a' epsilon transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the node `a'. ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away, we throw away the node `a'. */#define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node))static reg_errcode_tsift_states_backward (preg, mctx, sctx) const regex_t *preg; re_match_context_t *mctx; re_sift_context_t *sctx;{ reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; int null_cnt = 0; int str_idx = sctx->last_str_idx; re_node_set cur_dest; re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */#ifdef DEBUG assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);#endif cur_src = &mctx->state_log[str_idx]->nodes; /* Build sifted state_log[str_idx]. It has the nodes which can epsilon transit to the last_node and the last_node itself. */ err = re_node_set_init_1 (&cur_dest, sctx->last_node); if (BE (err != REG_NOERROR, 0)) return err; err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) goto free_return; /* Then check each states in the state_log. */ while (str_idx > 0) { int i, ret; /* Update counters. */ null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; if (null_cnt > mctx->max_mb_elem_len) { memset (sctx->sifted_states, '\0', sizeof (re_dfastate_t *) * str_idx); re_node_set_free (&cur_dest); return REG_NOERROR; } re_node_set_empty (&cur_dest); --str_idx; cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set : &mctx->state_log[str_idx]->nodes); /* Then build the next sifted state. We build the next sifted state on `cur_dest', and update `sifted_states[str_idx]' with `cur_dest'. Note: `cur_dest' is the sifted state from `state_log[str_idx + 1]'. `cur_src' points the node_set of the old `state_log[str_idx]'. */ for (i = 0; i < cur_src->nelem; i++) { int prev_node = cur_src->elems[i]; int naccepted = 0; re_token_type_t type = dfa->nodes[prev_node].type; if (IS_EPSILON_NODE(type)) continue;#ifdef RE_ENABLE_I18N /* If the node may accept `multi byte'. */ if (ACCEPT_MB_NODE (type)) naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node, str_idx, sctx->last_str_idx);#endif /* RE_ENABLE_I18N */ /* We don't check backreferences here. See update_cur_sifted_state(). */ if (!naccepted && check_node_accept (preg, dfa->nodes + prev_node, mctx, str_idx) && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], dfa->nexts[prev_node])) naccepted = 1; if (naccepted == 0) continue; if (sctx->limits.nelem) { int to_idx = str_idx + naccepted; if (check_dst_limits (dfa, &sctx->limits, mctx, dfa->nexts[prev_node], to_idx, prev_node, str_idx)) continue; } ret = re_node_set_insert (&cur_dest, prev_node); if (BE (ret == -1, 0)) { err = REG_ESPACE; goto free_return; } } /* Add all the nodes which satisfy the following conditions: - It can epsilon transit to a node in CUR_DEST. - It is in CUR_SRC. And update state_log. */ err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) goto free_return; } err = REG_NOERROR; free_return: re_node_set_free (&cur_dest); return err;}/* Helper functions. */static inline reg_errcode_tclean_state_log_if_need (mctx, next_state_log_idx) re_match_context_t *mctx; int next_state_log_idx;{ int top = mctx->state_log_top; if (next_state_log_idx >= mctx->input->bufs_len || (next_state_log_idx >= mctx->input->valid_len && mctx->input->valid_len < mctx->input->len)) { reg_errcode_t err; err = extend_buffers (mctx); if (BE (err != REG_NOERROR, 0)) return err; } if (top < next_state_log_idx) { memset (mctx->state_log + top + 1, '\0', sizeof (re_dfastate_t *) * (next_state_log_idx - top)); mctx->state_log_top = next_state_log_idx; } return REG_NOERROR;}static reg_errcode_tmerge_state_array (dfa, dst, src, num) re_dfa_t *dfa; re_dfastate_t **dst; re_dfastate_t **src; int num;{ int st_idx; reg_errcode_t err; for (st_idx = 0; st_idx < num; ++st_idx) { if (dst[st_idx] == NULL) dst[st_idx] = src[st_idx]; else if (src[st_idx] != NULL) { re_node_set merged_set; err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, &src[st_idx]->nodes); if (BE (err != REG_NOERROR, 0)) return err; dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); re_node_set_free (&merged_set); if (BE (err != REG_NOERROR, 0)) return err; } } return REG_NOERROR;}static reg_errcode_tupdate_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes) const regex_t *preg; re_match_context_t *mctx; re_sift_context_t *sctx; int str_idx; re_node_set *dest_nodes;{ reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set : &mctx->state_log[str_idx]->nodes); /* At first, add the nodes which can epsilon transit to a node in DEST_NODE. */ if (dest_nodes->nelem) { err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) return err; } /* Then, check the limitations in the current sift_context. */ if (dest_nodes->nelem && sctx->limits.nelem) { err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, mctx->bkref_ents, str_idx); if (BE (err != REG_NOERROR, 0)) return err; } /* Update state_log. */ sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0)) return err; if ((mctx->state_log[str_idx] != NULL && mctx->state_log[str_idx]->has_backref)) { err = sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes); if (BE (err != REG_NOERROR, 0)) return err; } return REG_NOERROR;}static reg_errcode_tadd_epsilon_src_nodes (dfa, dest_nodes, candidates) re_dfa_t *dfa; re_node_set *dest_nodes; const re_node_set *candidates;{ reg_errcode_t err; int src_idx; re_node_set src_copy; err = re_node_set_init_copy (&src_copy, dest_nodes); if (BE (err != REG_NOERROR, 0)) return err; for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx) { err = re_node_set_add_intersect (dest_nodes, candidates, dfa->inveclosures + src_copy.elems[src_idx]); if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&src_copy); return err; } } re_node_set_free (&src_copy); return REG_NOERROR;}static reg_errcode_tsub_epsilon_src_nodes (dfa, node, dest_nodes, candidates) re_dfa_t *dfa; int node; re_node_set *dest_nodes; const re_node_set *candidates;{ int ecl_idx; reg_errcode_t err; re_node_set *inv_eclosure = dfa->inveclosures + node; re_node_set except_nodes; re_node_set_init_empty (&except_nodes); for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { int cur_node = inv_eclosure->elems[ecl_idx]; if (cur_node == node) continue; if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) { int edst1 = dfa->edests[cur_node].elems[0]; int edst2 = ((dfa->edests[cur_node].nelem > 1) ? dfa->edests[cur_node].elems[1] : -1); if ((!re_node_set_contains (inv_eclosure, edst1) && re_node_set_contains (dest_nodes, edst1)) || (edst2 > 0 && !re_node_set_contains (inv_eclosure, edst2) && re_node_set_contains (dest_nodes, edst2))) { err = re_node_set_add_intersect (&except_nodes, candidates, dfa->inveclosures + cur_node); if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&except_nodes); return err; } } } } for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { int cur_node = inv_eclosure->elems[ecl_idx]; if (!re_node_set_contains (&except_nodes, cur_node)) { int idx = re_node_set_contains (dest_nodes, cur_node) - 1; re_node_set_remove_at (dest_nodes, idx); } } re_node_set_free (&except_nodes); return REG_NOERROR;}static intcheck_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx) re_dfa_t *dfa; re_node_set *limits; re_match_context_t *mctx; int dst_node, dst_idx, src_node, src_idx;{ int lim_idx, src_pos, dst_pos; for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) { int subexp_idx; struct re_backref_cache_entry *ent; ent = mctx->bkref_ents + limits->elems[lim_idx]; subexp_idx = dfa->nodes[ent->node].opr.idx - 1; dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], dfa->eclosures + dst_node, subexp_idx, dst_node, dst_idx); src_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx], dfa->eclosures + src_node, subexp_idx, src_node, src_idx); /* In case of: <src> <dst> ( <subexp> ) ( <subexp> ) <src> <dst> ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ if (src_pos == dst_pos) continue; /* This is unrelated limitation. */ else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -