regexec.c

来自「gnu tar 源码包。 tar 软件是 Unix 系统下的一个打包软件」· C语言代码 · 共 2,175 行 · 第 1/5 页
2,175 行
  return REG_NOERROR;}/* Helper functions.  */static reg_errcode_tinternal_functionclean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx){  Idx top = mctx->state_log_top;  if (next_state_log_idx >= mctx->input.bufs_len      || (next_state_log_idx >= mctx->input.valid_len	  && mctx->input.valid_len < mctx->input.len))    {      reg_errcode_t err;      err = extend_buffers (mctx);      if (BE (err != REG_NOERROR, 0))	return err;    }  if (top < next_state_log_idx)    {      memset (mctx->state_log + top + 1, '\0',	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));      mctx->state_log_top = next_state_log_idx;    }  return REG_NOERROR;}static reg_errcode_tinternal_functionmerge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,		   re_dfastate_t **src, Idx num){  Idx st_idx;  reg_errcode_t err;  for (st_idx = 0; st_idx < num; ++st_idx)    {      if (dst[st_idx] == NULL)	dst[st_idx] = src[st_idx];      else if (src[st_idx] != NULL)	{	  re_node_set merged_set;	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,					&src[st_idx]->nodes);	  if (BE (err != REG_NOERROR, 0))	    return err;	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);	  re_node_set_free (&merged_set);	  if (BE (err != REG_NOERROR, 0))	    return err;	}    }  return REG_NOERROR;}static reg_errcode_tinternal_functionupdate_cur_sifted_state (const re_match_context_t *mctx,			 re_sift_context_t *sctx, Idx str_idx,			 re_node_set *dest_nodes){  const re_dfa_t *const dfa = mctx->dfa;  reg_errcode_t err = REG_NOERROR;  const re_node_set *candidates;  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL		: &mctx->state_log[str_idx]->nodes);  if (dest_nodes->nelem == 0)    sctx->sifted_states[str_idx] = NULL;  else    {      if (candidates)	{	  /* At first, add the nodes which can epsilon transit to a node in	     DEST_NODE.  */	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);	  if (BE (err != REG_NOERROR, 0))	    return err;	  /* Then, check the limitations in the current sift_context.  */	  if (sctx->limits.nelem)	    {	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,					 mctx->bkref_ents, str_idx);	      if (BE (err != REG_NOERROR, 0))		return err;	    }	}      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);      if (BE (err != REG_NOERROR, 0))	return err;    }  if (candidates && mctx->state_log[str_idx]->has_backref)    {      err = sift_states_bkref (mctx, sctx, str_idx, candidates);      if (BE (err != REG_NOERROR, 0))	return err;    }  return REG_NOERROR;}static reg_errcode_tinternal_functionadd_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,		       const re_node_set *candidates){  reg_errcode_t err = REG_NOERROR;  Idx i;  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);  if (BE (err != REG_NOERROR, 0))    return err;  if (!state->inveclosure.alloc)    {      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);      if (BE (err != REG_NOERROR, 0))        return REG_ESPACE;      for (i = 0; i < dest_nodes->nelem; i++)        re_node_set_merge (&state->inveclosure,			   dfa->inveclosures + dest_nodes->elems[i]);    }  return re_node_set_add_intersect (dest_nodes, candidates,				    &state->inveclosure);}static reg_errcode_tinternal_functionsub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,		       const re_node_set *candidates){    Idx ecl_idx;    reg_errcode_t err;    re_node_set *inv_eclosure = dfa->inveclosures + node;    re_node_set except_nodes;    re_node_set_init_empty (&except_nodes);    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)      {	Idx cur_node = inv_eclosure->elems[ecl_idx];	if (cur_node == node)	  continue;	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))	  {	    Idx edst1 = dfa->edests[cur_node].elems[0];	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);	    if ((!re_node_set_contains (inv_eclosure, edst1)		 && re_node_set_contains (dest_nodes, edst1))		|| (REG_VALID_NONZERO_INDEX (edst2)		    && !re_node_set_contains (inv_eclosure, edst2)		    && re_node_set_contains (dest_nodes, edst2)))	      {		err = re_node_set_add_intersect (&except_nodes, candidates,						 dfa->inveclosures + cur_node);		if (BE (err != REG_NOERROR, 0))		  {		    re_node_set_free (&except_nodes);		    return err;		  }	      }	  }      }    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)      {	Idx cur_node = inv_eclosure->elems[ecl_idx];	if (!re_node_set_contains (&except_nodes, cur_node))	  {	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;	    re_node_set_remove_at (dest_nodes, idx);	  }      }    re_node_set_free (&except_nodes);    return REG_NOERROR;}static boolinternal_functioncheck_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx){  const re_dfa_t *const dfa = mctx->dfa;  Idx lim_idx, src_pos, dst_pos;  Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);  Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)    {      Idx subexp_idx;      struct re_backref_cache_entry *ent;      ent = mctx->bkref_ents + limits->elems[lim_idx];      subexp_idx = dfa->nodes[ent->node].opr.idx;      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],					   subexp_idx, dst_node, dst_idx,					   dst_bkref_idx);      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],					   subexp_idx, src_node, src_idx,					   src_bkref_idx);      /* In case of:	 <src> <dst> ( <subexp> )	 ( <subexp> ) <src> <dst>	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */      if (src_pos == dst_pos)	continue; /* This is unrelated limitation.  */      else	return true;    }  return false;}static intinternal_functioncheck_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,			     Idx subexp_idx, Idx from_node, Idx bkref_idx){  const re_dfa_t *const dfa = mctx->dfa;  const re_node_set *eclosures = dfa->eclosures + from_node;  Idx node_idx;  /* Else, we are on the boundary: examine the nodes on the epsilon     closure.  */  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)    {      Idx node = eclosures->elems[node_idx];      switch (dfa->nodes[node].type)	{	case OP_BACK_REF:	  if (bkref_idx != REG_MISSING)	    {	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;	      do	        {		  Idx dst;		  int cpos;		  if (ent->node != node)		    continue;		  if (subexp_idx < BITSET_WORD_BITS		      && !(ent->eps_reachable_subexps_map			   & ((bitset_word_t) 1 << subexp_idx)))		    continue;		  /* Recurse trying to reach the OP_OPEN_SUBEXP and		     OP_CLOSE_SUBEXP cases below.  But, if the		     destination node is the same node as the source		     node, don't recurse because it would cause an		     infinite loop: a regex that exhibits this behavior		     is ()\1*\1*  */		  dst = dfa->edests[node].elems[0];		  if (dst == from_node)		    {		      if (boundaries & 1)		        return -1;		      else /* if (boundaries & 2) */		        return 0;		    }		  cpos =		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,						 dst, bkref_idx);		  if (cpos == -1 /* && (boundaries & 1) */)		    return -1;		  if (cpos == 0 && (boundaries & 2))		    return 0;		  if (subexp_idx < BITSET_WORD_BITS)		    ent->eps_reachable_subexps_map		      &= ~((bitset_word_t) 1 << subexp_idx);	        }	      while (ent++->more);	    }	  break;	case OP_OPEN_SUBEXP:	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)	    return -1;	  break;	case OP_CLOSE_SUBEXP:	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)	    return 0;	  break;	default:	    break;	}    }  return (boundaries & 2) ? 1 : 0;}static intinternal_functioncheck_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,			   Idx subexp_idx, Idx from_node, Idx str_idx,			   Idx bkref_idx){  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;  int boundaries;  /* If we are outside the range of the subexpression, return -1 or 1.  */  if (str_idx < lim->subexp_from)    return -1;  if (lim->subexp_to < str_idx)    return 1;  /* If we are within the subexpression, return 0.  */  boundaries = (str_idx == lim->subexp_from);  boundaries |= (str_idx == lim->subexp_to) << 1;  if (boundaries == 0)    return 0;  /* Else, examine epsilon closure.  */  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,				      from_node, bkref_idx);}/* Check the limitations of sub expressions LIMITS, and remove the nodes   which are against limitations from DEST_NODES. */static reg_errcode_tinternal_functioncheck_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,		     const re_node_set *candidates, re_node_set *limits,		     struct re_backref_cache_entry *bkref_ents, Idx str_idx){  reg_errcode_t err;  Idx node_idx, lim_idx;  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)    {      Idx subexp_idx;      struct re_backref_cache_entry *ent;      ent = bkref_ents + limits->elems[lim_idx];      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)	continue; /* This is unrelated limitation.  */      subexp_idx = dfa->nodes[ent->node].opr.idx;      if (ent->subexp_to == str_idx)	{	  Idx ops_node = REG_MISSING;	  Idx cls_node = REG_MISSING;	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)	    {	      Idx node = dest_nodes->elems[node_idx];	      re_token_type_t type = dfa->nodes[node].type;	      if (type == OP_OPEN_SUBEXP		  && subexp_idx == dfa->nodes[node].opr.idx)		ops_node = node;	      else if (type == OP_CLOSE_SUBEXP		       && subexp_idx == dfa->nodes[node].opr.idx)		cls_node = node;	    }	  /* Check the limitation of the open subexpression.  */	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */	  if (REG_VALID_INDEX (ops_node))	    {	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,					   candidates);	      if (BE (err != REG_NOERROR, 0))		return err;	    }	  /* Check the limitation of the close subexpression.  */	  if (REG_VALID_INDEX (cls_node))	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)	      {		Idx node = dest_nodes->elems[node_idx];		if (!re_node_set_contains (dfa->inveclosures + node,					   cls_node)		    && !re_node_set_contains (dfa->eclosures + node,					      cls_node))		  {		    /* It is against this limitation.		       Remove it form the current sifted state.  */		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,						 candidates);		    if (BE (err != REG_NOERROR, 0))		      return err;		    --node_idx;		  }	      }	}      else /* (ent->subexp_to != str_idx)  */	{	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)	    {	      Idx node = dest_nodes->elems[node_idx];	      re_token_type_t type = dfa->nodes[node].type;	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)		{		  if (subexp_idx != dfa->nodes[node].opr.idx)		    continue;		  /* It is against this limitation.		     Remove it form the current sifted state.  */		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,					       candidates);		  if (BE (err != REG_NOERROR, 0))		    return err;		}	    }	}    }  return REG_NOERROR;}static reg_errcode_tinternal_functionsift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,		   Idx str_idx, const re_node_set *candidates){  const re_dfa_t *const dfa = mctx->dfa;  reg_errcode_t err;  Idx node_idx, node;  re_sift_context_t local_sctx;  Idx first_idx = search_cur_bkref_entry (mctx, str_idx);  if (first_idx == REG_MISSING)    return REG_NOERROR;  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)    {      Idx enabled_idx;      re_tok
regexec.c - 源码说明

本页面展示了「gnu tar 源码包。 tar 软件是 Unix 系统下的一个打包软件」中的 regexec.c 源码文件，采用 C语言编程语言编写，共 2,175 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与tar相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?