📄 regexec.c
字号:
return 1; } return 0;}static intcheck_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node, str_idx) re_dfa_t *dfa; re_match_context_t *mctx; re_node_set *eclosures; int limit, subexp_idx, node, str_idx;{ struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; int pos = (str_idx < lim->subexp_from ? -1 : (lim->subexp_to < str_idx ? 1 : 0)); if (pos == 0 && (str_idx == lim->subexp_from || str_idx == lim->subexp_to)) { int node_idx; for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) { int node = eclosures->elems[node_idx]; re_token_type_t type= dfa->nodes[node].type; if (type == OP_BACK_REF) { int bi = search_cur_bkref_entry (mctx, str_idx); for (; bi < mctx->nbkref_ents; ++bi) { struct re_backref_cache_entry *ent = mctx->bkref_ents + bi; if (ent->str_idx > str_idx) break; if (ent->node == node && ent->subexp_from == ent->subexp_to) { int cpos, dst; dst = dfa->edests[node].elems[0]; cpos = check_dst_limits_calc_pos (dfa, mctx, limit, dfa->eclosures + dst, subexp_idx, dst, str_idx); if ((str_idx == lim->subexp_from && cpos == -1) || (str_idx == lim->subexp_to && cpos == 0)) return cpos; } } } if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx && str_idx == lim->subexp_from) { pos = -1; break; } if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx && str_idx == lim->subexp_to) break; } if (node_idx == eclosures->nelem && str_idx == lim->subexp_to) pos = 1; } return pos;}/* Check the limitations of sub expressions LIMITS, and remove the nodes which are against limitations from DEST_NODES. */static reg_errcode_tcheck_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx) re_dfa_t *dfa; re_node_set *dest_nodes; const re_node_set *candidates; re_node_set *limits; struct re_backref_cache_entry *bkref_ents; int str_idx;{ reg_errcode_t err; int node_idx, lim_idx; for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) { int subexp_idx; struct re_backref_cache_entry *ent; ent = bkref_ents + limits->elems[lim_idx]; if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) continue; /* This is unrelated limitation. */ subexp_idx = dfa->nodes[ent->node].opr.idx - 1; if (ent->subexp_to == str_idx) { int ops_node = -1; int cls_node = -1; for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { int node = dest_nodes->elems[node_idx]; re_token_type_t type= dfa->nodes[node].type; if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx) ops_node = node; else if (type == OP_CLOSE_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx) cls_node = node; } /* Check the limitation of the open subexpression. */ /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ if (ops_node >= 0) { err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) return err; } /* Check the limitation of the close subexpression. */ for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { int node = dest_nodes->elems[node_idx]; if (!re_node_set_contains (dfa->inveclosures + node, cls_node) && !re_node_set_contains (dfa->eclosures + node, cls_node)) { /* It is against this limitation. Remove it form the current sifted state. */ err = sub_epsilon_src_nodes(dfa, node, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) return err; --node_idx; } } } else /* (ent->subexp_to != str_idx) */ { for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { int node = dest_nodes->elems[node_idx]; re_token_type_t type= dfa->nodes[node].type; if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) { if (subexp_idx != dfa->nodes[node].opr.idx) continue; if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx) || (type == OP_OPEN_SUBEXP)) { /* It is against this limitation. Remove it form the current sifted state. */ err = sub_epsilon_src_nodes(dfa, node, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) return err; } } } } } return REG_NOERROR;}static reg_errcode_tsift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes) const regex_t *preg; re_match_context_t *mctx; re_sift_context_t *sctx; int str_idx; re_node_set *dest_nodes;{ reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *)preg->buffer; int node_idx, node; re_sift_context_t local_sctx; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set : &mctx->state_log[str_idx]->nodes); local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) { int cur_bkref_idx = re_string_cur_idx (mctx->input); re_token_type_t type; node = candidates->elems[node_idx]; type = dfa->nodes[node].type; if (node == sctx->cur_bkref && str_idx == cur_bkref_idx) continue; /* Avoid infinite loop for the REs like "()\1+". */ if (node == sctx->last_node && str_idx == sctx->last_str_idx) continue; if (type == OP_BACK_REF) { int enabled_idx = search_cur_bkref_entry (mctx, str_idx); for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx) { int disabled_idx, subexp_len, to_idx, dst_node; struct re_backref_cache_entry *entry; entry = mctx->bkref_ents + enabled_idx; if (entry->str_idx > str_idx) break; if (entry->node != node) continue; subexp_len = entry->subexp_to - entry->subexp_from; to_idx = str_idx + subexp_len; dst_node = (subexp_len ? dfa->nexts[node] : dfa->edests[node].elems[0]); if (to_idx > sctx->last_str_idx || sctx->sifted_states[to_idx] == NULL || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) || check_dst_limits (dfa, &sctx->limits, mctx, node, str_idx, dst_node, to_idx)) continue; { re_dfastate_t *cur_state; entry->flag = 0; for (disabled_idx = enabled_idx + 1; disabled_idx < mctx->nbkref_ents; ++disabled_idx) { struct re_backref_cache_entry *entry2; entry2 = mctx->bkref_ents + disabled_idx; if (entry2->str_idx > str_idx) break; entry2->flag = (entry2->node == node) ? 1 : entry2->flag; } if (local_sctx.sifted_states == NULL) { local_sctx = *sctx; err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); if (BE (err != REG_NOERROR, 0)) goto free_return; } local_sctx.last_node = node; local_sctx.last_str_idx = str_idx; err = re_node_set_insert (&local_sctx.limits, enabled_idx); if (BE (err < 0, 0)) { err = REG_ESPACE; goto free_return; } cur_state = local_sctx.sifted_states[str_idx]; err = sift_states_backward (preg, mctx, &local_sctx); if (BE (err != REG_NOERROR, 0)) goto free_return; if (sctx->limited_states != NULL) { err = merge_state_array (dfa, sctx->limited_states, local_sctx.sifted_states, str_idx + 1); if (BE (err != REG_NOERROR, 0)) goto free_return; } local_sctx.sifted_states[str_idx] = cur_state; re_node_set_remove (&local_sctx.limits, enabled_idx); /* We must not use the variable entry here, since mctx->bkref_ents might be realloced. */ mctx->bkref_ents[enabled_idx].flag = 1; } } enabled_idx = search_cur_bkref_entry (mctx, str_idx); for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx) { struct re_backref_cache_entry *entry; entry = mctx->bkref_ents + enabled_idx; if (entry->str_idx > str_idx) break; if (entry->node == node) entry->flag = 0; } } } err = REG_NOERROR; free_return: if (local_sctx.sifted_states != NULL) { re_node_set_free (&local_sctx.limits); } return err;}#ifdef RE_ENABLE_I18Nstatic intsift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx) const regex_t *preg; const re_match_context_t *mctx; re_sift_context_t *sctx; int node_idx, str_idx, max_str_idx;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int naccepted; /* Check the node can accept `multi byte'. */ naccepted = check_node_accept_bytes (preg, node_idx, mctx->input, str_idx); if (naccepted > 0 && str_idx + naccepted <= max_str_idx && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], dfa->nexts[node_idx])) /* The node can't accept the `multi byte', or the destination was already throwed away, then the node could't accept the current input `multi byte'. */ naccepted = 0; /* Otherwise, it is sure that the node could accept `naccepted' bytes input. */ return naccepted;}#endif /* RE_ENABLE_I18N *//* Functions for state transition. *//* Return the next state to which the current state STATE will transit by accepting the current input byte, and update STATE_LOG if necessary. If STATE can accept a multibyte char/collating element/back reference update the destination of STATE_LOG. */static re_dfastate_t *transit_state (err, preg, mctx, state, fl_search) reg_errcode_t *err; const regex_t *preg; re_match_context_t *mctx; re_dfastate_t *state; int fl_search;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfastate_t **trtable, *next_state; unsigned char ch; int cur_idx; if (re_string_cur_idx (mctx->input) + 1 >= mctx->input->bufs_len || (re_string_cur_idx (mctx->input) + 1 >= mctx->input->valid_len && mctx->input->valid_len < mctx->input->len)) { *err = extend_buffers (mctx); if (BE (*err != REG_NOERROR, 0)) return NULL; } *err = REG_NOERROR; if (state == NULL) { next_state = state; re_string_skip_bytes (mctx->input, 1); } else {#ifdef RE_ENABLE_I18N /* If the current state can accept multibyte. */ if (state->accept_mb) { *err = transit_state_mb (preg, state, mctx); if (BE (*err != REG_NOERROR, 0)) return NULL; }#endif /* RE_ENABLE_I18N */ /* Then decide the next state with the single byte. */ if (1) { /* Use transition table */ ch = re_string_fetch_byte (mctx->input); trtable = fl_search ? state->trtable_search : state->trtable; if (trtable == NULL) { trtable = build_trtable (preg, state, fl_search); if (fl_search) state->trtable_search = trtable; else state->trtable = trtable; } next_state = trtable[ch]; } else { /* don't use transition table */ next_state = transit_state_sb (err, preg, state, fl_search, mctx); if (BE (next_state == NULL && err != REG_NOERROR, 0)) return NULL; } } cur_idx = re_string_cur_idx (mctx->input); /* Update the state_log if we need. */ if (mctx->state_log != NULL) { if (cur_idx > mctx->state_log_top) { mctx->state_log[cur_idx] = next_state; mctx->state_log_top = cur_idx; } else if (mctx->state_log[cur_idx] == 0) { mctx->state_log[cur_idx] = next_state; } else { re_dfastate_t *pstate; unsigned int context; re_node_set next_nodes, *log_nodes, *table_nodes = NULL; /* If (state_log[cur_idx] != 0), it implies that cur_idx is the destination of a multibyte char/collating element/ back reference. Then the next state is the union set of these destinations and the results of the transition table. */ pstate = mctx->state_log[cur_idx]; log_nodes = pstate->entrance_nodes; if (next_state != NULL) { table_nodes = next_state->entrance_nodes; *err = re_node_set_init_union (&next_nodes, table_nodes, log_nodes); if (BE (*err != REG_NOERROR, 0)) return NULL; } else next_nodes = *log_nodes; /* Note: We already add the nodes of the initial state, then we don't need to add them here. */ context = re_string_context_at (mctx->input, re_string_cur_idx (mctx->input) - 1, mctx->eflags, preg->newline_anchor); next_state = mctx->state_log[cur_idx] = re_acquire_state_context (err, dfa, &next_nodes, context); /* We don't need to check errors here, since the return value of this function is next_state and ERR is already set. */ if (table_nodes != NULL) re_node_set_free (&next_nodes); } } /* Check OP_OPEN_SUBEXP in the current state in case that we use them late
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -