📄 regex_internal.c
字号:
if (elem < set->elems[0]) { idx = 0; for (idx = set->nelem; idx > 0; idx--) set->elems[idx] = set->elems[idx - 1]; } else { for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) set->elems[idx] = set->elems[idx - 1]; } /* Insert the new element. */ set->elems[idx] = elem; ++set->nelem; return true;}/* Insert the new element ELEM to the re_node_set* SET. SET should not already have any element greater than or equal to ELEM. Return true if successful. */static boolinternal_functionre_node_set_insert_last (re_node_set *set, Idx elem){ /* Realloc if we need. */ if (set->alloc == set->nelem) { Idx *new_elems; set->alloc = (set->alloc + 1) * 2; new_elems = re_realloc (set->elems, Idx, set->alloc); if (BE (new_elems == NULL, 0)) return false; set->elems = new_elems; } /* Insert the new element. */ set->elems[set->nelem++] = elem; return true;}/* Compare two node sets SET1 and SET2. Return true if SET1 and SET2 are equivalent. */static boolinternal_function __attribute ((pure))re_node_set_compare (const re_node_set *set1, const re_node_set *set2){ Idx i; if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) return false; for (i = set1->nelem ; REG_VALID_INDEX (--i) ; ) if (set1->elems[i] != set2->elems[i]) return false; return true;}/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */static Idxinternal_function __attribute ((pure))re_node_set_contains (const re_node_set *set, Idx elem){ __re_size_t idx, right, mid; if (! REG_VALID_NONZERO_INDEX (set->nelem)) return 0; /* Binary search the element. */ idx = 0; right = set->nelem - 1; while (idx < right) { mid = (idx + right) / 2; if (set->elems[mid] < elem) idx = mid + 1; else right = mid; } return set->elems[idx] == elem ? idx + 1 : 0;}static voidinternal_functionre_node_set_remove_at (re_node_set *set, Idx idx){ if (idx < 0 || idx >= set->nelem) return; --set->nelem; for (; idx < set->nelem; idx++) set->elems[idx] = set->elems[idx + 1];}/* Add the token TOKEN to dfa->nodes, and return the index of the token. Or return REG_MISSING if an error occurred. */static Idxinternal_functionre_dfa_add_node (re_dfa_t *dfa, re_token_t token){ if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) { size_t new_nodes_alloc = dfa->nodes_alloc * 2; Idx *new_nexts, *new_indices; re_node_set *new_edests, *new_eclosures; re_token_t *new_nodes; size_t max_object_size = MAX (sizeof (re_token_t), MAX (sizeof (re_node_set), sizeof (Idx))); /* Avoid overflows. */ if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0)) return REG_MISSING; new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); if (BE (new_nodes == NULL, 0)) return REG_MISSING; dfa->nodes = new_nodes; new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc); new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc); new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); if (BE (new_nexts == NULL || new_indices == NULL || new_edests == NULL || new_eclosures == NULL, 0)) return REG_MISSING; dfa->nexts = new_nexts; dfa->org_indices = new_indices; dfa->edests = new_edests; dfa->eclosures = new_eclosures; dfa->nodes_alloc = new_nodes_alloc; } dfa->nodes[dfa->nodes_len] = token; dfa->nodes[dfa->nodes_len].constraint = 0;#ifdef RE_ENABLE_I18N { int type = token.type; dfa->nodes[dfa->nodes_len].accept_mb = (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; }#endif dfa->nexts[dfa->nodes_len] = REG_MISSING; re_node_set_init_empty (dfa->edests + dfa->nodes_len); re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); return dfa->nodes_len++;}static inline re_hashval_tinternal_functioncalc_state_hash (const re_node_set *nodes, unsigned int context){ re_hashval_t hash = nodes->nelem + context; Idx i; for (i = 0 ; i < nodes->nelem ; i++) hash += nodes->elems[i]; return hash;}/* Search for the state whose node_set is equivalent to NODES. Return the pointer to the state, if we found it in the DFA. Otherwise create the new one and return it. In case of an error return NULL and set the error code in ERR. Note: - We assume NULL as the invalid state, then it is possible that return value is NULL and ERR is REG_NOERROR. - We never return non-NULL value in case of any errors, it is for optimization. */static re_dfastate_t *internal_functionre_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, const re_node_set *nodes){ re_hashval_t hash; re_dfastate_t *new_state; struct re_state_table_entry *spot; Idx i;#ifdef lint /* Suppress bogus uninitialized-variable warnings. */ *err = REG_NOERROR;#endif if (BE (nodes->nelem == 0, 0)) { *err = REG_NOERROR; return NULL; } hash = calc_state_hash (nodes, 0); spot = dfa->state_table + (hash & dfa->state_hash_mask); for (i = 0 ; i < spot->num ; i++) { re_dfastate_t *state = spot->array[i]; if (hash != state->hash) continue; if (re_node_set_compare (&state->nodes, nodes)) return state; } /* There are no appropriate state in the dfa, create the new one. */ new_state = create_ci_newstate (dfa, nodes, hash); if (BE (new_state == NULL, 0)) *err = REG_ESPACE; return new_state;}/* Search for the state whose node_set is equivalent to NODES and whose context is equivalent to CONTEXT. Return the pointer to the state, if we found it in the DFA. Otherwise create the new one and return it. In case of an error return NULL and set the error code in ERR. Note: - We assume NULL as the invalid state, then it is possible that return value is NULL and ERR is REG_NOERROR. - We never return non-NULL value in case of any errors, it is for optimization. */static re_dfastate_t *internal_functionre_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, const re_node_set *nodes, unsigned int context){ re_hashval_t hash; re_dfastate_t *new_state; struct re_state_table_entry *spot; Idx i;#ifdef lint /* Suppress bogus uninitialized-variable warnings. */ *err = REG_NOERROR;#endif if (nodes->nelem == 0) { *err = REG_NOERROR; return NULL; } hash = calc_state_hash (nodes, context); spot = dfa->state_table + (hash & dfa->state_hash_mask); for (i = 0 ; i < spot->num ; i++) { re_dfastate_t *state = spot->array[i]; if (state->hash == hash && state->context == context && re_node_set_compare (state->entrance_nodes, nodes)) return state; } /* There are no appropriate state in `dfa', create the new one. */ new_state = create_cd_newstate (dfa, nodes, context, hash); if (BE (new_state == NULL, 0)) *err = REG_ESPACE; return new_state;}/* Finish initialization of the new state NEWSTATE, and using its hash value HASH put in the appropriate bucket of DFA's state table. Return value indicates the error code if failed. */static reg_errcode_tregister_state (const re_dfa_t *dfa, re_dfastate_t *newstate, re_hashval_t hash){ struct re_state_table_entry *spot; reg_errcode_t err; Idx i; newstate->hash = hash; err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); if (BE (err != REG_NOERROR, 0)) return REG_ESPACE; for (i = 0; i < newstate->nodes.nelem; i++) { Idx elem = newstate->nodes.elems[i]; if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0)) return REG_ESPACE; } spot = dfa->state_table + (hash & dfa->state_hash_mask); if (BE (spot->alloc <= spot->num, 0)) { Idx new_alloc = 2 * spot->num + 2; re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, new_alloc); if (BE (new_array == NULL, 0)) return REG_ESPACE; spot->array = new_array; spot->alloc = new_alloc; } spot->array[spot->num++] = newstate; return REG_NOERROR;}static voidfree_state (re_dfastate_t *state){ re_node_set_free (&state->non_eps_nodes); re_node_set_free (&state->inveclosure); if (state->entrance_nodes != &state->nodes) { re_node_set_free (state->entrance_nodes); re_free (state->entrance_nodes); } re_node_set_free (&state->nodes); re_free (state->word_trtable); re_free (state->trtable); re_free (state);}/* Create the new state which is independ of contexts. Return the new state if succeeded, otherwise return NULL. */static re_dfastate_t *internal_functioncreate_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, re_hashval_t hash){ Idx i; reg_errcode_t err; re_dfastate_t *newstate; newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); if (BE (newstate == NULL, 0)) return NULL; err = re_node_set_init_copy (&newstate->nodes, nodes); if (BE (err != REG_NOERROR, 0)) { re_free (newstate); return NULL; } newstate->entrance_nodes = &newstate->nodes; for (i = 0 ; i < nodes->nelem ; i++) { re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; if (type == CHARACTER && !node->constraint) continue;#ifdef RE_ENABLE_I18N newstate->accept_mb |= node->accept_mb;#endif /* RE_ENABLE_I18N */ /* If the state has the halt node, the state is a halt state. */ if (type == END_OF_RE) newstate->halt = 1; else if (type == OP_BACK_REF) newstate->has_backref = 1; else if (type == ANCHOR || node->constraint) newstate->has_constraint = 1; } err = register_state (dfa, newstate, hash); if (BE (err != REG_NOERROR, 0)) { free_state (newstate); newstate = NULL; } return newstate;}/* Create the new state which is depend on the context CONTEXT. Return the new state if succeeded, otherwise return NULL. */static re_dfastate_t *internal_functioncreate_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, unsigned int context, re_hashval_t hash){ Idx i, nctx_nodes = 0; reg_errcode_t err; re_dfastate_t *newstate; newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); if (BE (newstate == NULL, 0)) return NULL; err = re_node_set_init_copy (&newstate->nodes, nodes); if (BE (err != REG_NOERROR, 0)) { re_free (newstate); return NULL; } newstate->context = context; newstate->entrance_nodes = &newstate->nodes; for (i = 0 ; i < nodes->nelem ; i++) { unsigned int constraint = 0; re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; if (node->constraint) constraint = node->constraint; if (type == CHARACTER && !constraint) continue;#ifdef RE_ENABLE_I18N newstate->accept_mb |= node->accept_mb;#endif /* RE_ENABLE_I18N */ /* If the state has the halt node, the state is a halt state. */ if (type == END_OF_RE) newstate->halt = 1; else if (type == OP_BACK_REF) newstate->has_backref = 1; else if (type == ANCHOR) constraint = node->opr.ctx_type; if (constraint) { if (newstate->entrance_nodes == &newstate->nodes) { newstate->entrance_nodes = re_malloc (re_node_set, 1); if (BE (newstate->entrance_nodes == NULL, 0)) { free_state (newstate); return NULL; } re_node_set_init_copy (newstate->entrance_nodes, nodes); nctx_nodes = 0; newstate->has_constraint = 1; } if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) { re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); ++nctx_nodes; } } } err = register_state (dfa, newstate, hash); if (BE (err != REG_NOERROR, 0)) { free_state (newstate); newstate = NULL; } return newstate;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -