regexec.c

来自「gnu tar 源码包。 tar 软件是 Unix 系统下的一个打包软件」· C语言代码 · 共 2,175 行 · 第 1/5 页
2,175 行
	    dest_node = candidate;          else	    {	      /* In order to avoid infinite loop like "(a*)*", return the second	         epsilon-transition if the first was already considered.  */	      if (re_node_set_contains (eps_via_nodes, dest_node))	        return candidate;	      /* Otherwise, push the second epsilon-transition on the fail stack.  */	      else if (fs != NULL		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,				           eps_via_nodes))		return REG_ERROR;	      /* We know we are going to exit.  */	      break;	    }	}      return dest_node;    }  else    {      Idx naccepted = 0;      re_token_type_t type = dfa->nodes[node].type;#ifdef RE_ENABLE_I18N      if (dfa->nodes[node].accept_mb)	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);      else#endif /* RE_ENABLE_I18N */      if (type == OP_BACK_REF)	{	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;	  if (fs != NULL)	    {	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)		return REG_MISSING;	      else if (naccepted)		{		  char *buf = (char *) re_string_get_buffer (&mctx->input);		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,			      naccepted) != 0)		    return REG_MISSING;		}	    }	  if (naccepted == 0)	    {	      Idx dest_node;	      ok = re_node_set_insert (eps_via_nodes, node);	      if (BE (! ok, 0))		return REG_ERROR;	      dest_node = dfa->edests[node].elems[0];	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,					dest_node))		return dest_node;	    }	}      if (naccepted != 0	  || check_node_accept (mctx, dfa->nodes + node, *pidx))	{	  Idx dest_node = dfa->nexts[node];	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,					       dest_node)))	    return REG_MISSING;	  re_node_set_empty (eps_via_nodes);	  return dest_node;	}    }  return REG_MISSING;}static reg_errcode_tinternal_functionpush_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes){  reg_errcode_t err;  Idx num = fs->num++;  if (fs->num == fs->alloc)    {      struct re_fail_stack_ent_t *new_array;      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)				       * fs->alloc * 2));      if (new_array == NULL)	return REG_ESPACE;      fs->alloc *= 2;      fs->stack = new_array;    }  fs->stack[num].idx = str_idx;  fs->stack[num].node = dest_node;  fs->stack[num].regs = re_malloc (regmatch_t, nregs);  if (fs->stack[num].regs == NULL)    return REG_ESPACE;  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);  return err;}static Idxinternal_functionpop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,		regmatch_t *regs, re_node_set *eps_via_nodes){  Idx num = --fs->num;  assert (REG_VALID_INDEX (num));  *pidx = fs->stack[num].idx;  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);  re_node_set_free (eps_via_nodes);  re_free (fs->stack[num].regs);  *eps_via_nodes = fs->stack[num].eps_via_nodes;  return fs->stack[num].node;}/* Set the positions where the subexpressions are starts/ends to registers   PMATCH.   Note: We assume that pmatch[0] is already set, and   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */static reg_errcode_tinternal_functionset_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,	  regmatch_t *pmatch, bool fl_backtrack){  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;  Idx idx, cur_node;  re_node_set eps_via_nodes;  struct re_fail_stack_t *fs;  struct re_fail_stack_t fs_body = { 0, 2, NULL };  regmatch_t *prev_idx_match;  bool prev_idx_match_malloced = false;#ifdef DEBUG  assert (nmatch > 1);  assert (mctx->state_log != NULL);#endif  if (fl_backtrack)    {      fs = &fs_body;      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);      if (fs->stack == NULL)	return REG_ESPACE;    }  else    fs = NULL;  cur_node = dfa->init_node;  re_node_set_init_empty (&eps_via_nodes);  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));  else    {      prev_idx_match = re_malloc (regmatch_t, nmatch);      if (prev_idx_match == NULL)	{	  free_fail_stack_return (fs);	  return REG_ESPACE;	}      prev_idx_match_malloced = true;    }  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)    {      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)	{	  Idx reg_idx;	  if (fs)	    {	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)		  break;	      if (reg_idx == nmatch)		{		  re_node_set_free (&eps_via_nodes);		  if (prev_idx_match_malloced)		    re_free (prev_idx_match);		  return free_fail_stack_return (fs);		}	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,					 &eps_via_nodes);	    }	  else	    {	      re_node_set_free (&eps_via_nodes);	      if (prev_idx_match_malloced)		re_free (prev_idx_match);	      return REG_NOERROR;	    }	}      /* Proceed to next node.  */      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,				    &eps_via_nodes, fs);      if (BE (! REG_VALID_INDEX (cur_node), 0))	{	  if (BE (cur_node == REG_ERROR, 0))	    {	      re_node_set_free (&eps_via_nodes);	      if (prev_idx_match_malloced)		re_free (prev_idx_match);	      free_fail_stack_return (fs);	      return REG_ESPACE;	    }	  if (fs)	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,				       &eps_via_nodes);	  else	    {	      re_node_set_free (&eps_via_nodes);	      if (prev_idx_match_malloced)		re_free (prev_idx_match);	      return REG_NOMATCH;	    }	}    }  re_node_set_free (&eps_via_nodes);  if (prev_idx_match_malloced)    re_free (prev_idx_match);  return free_fail_stack_return (fs);}static reg_errcode_tinternal_functionfree_fail_stack_return (struct re_fail_stack_t *fs){  if (fs)    {      Idx fs_idx;      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)	{	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);	  re_free (fs->stack[fs_idx].regs);	}      re_free (fs->stack);    }  return REG_NOERROR;}static voidinternal_functionupdate_regs (const re_dfa_t *dfa, regmatch_t *pmatch,	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch){  int type = dfa->nodes[cur_node].type;  if (type == OP_OPEN_SUBEXP)    {      Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;      /* We are at the first node of this sub expression.  */      if (reg_num < nmatch)	{	  pmatch[reg_num].rm_so = cur_idx;	  pmatch[reg_num].rm_eo = -1;	}    }  else if (type == OP_CLOSE_SUBEXP)    {      Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;      if (reg_num < nmatch)	{	  /* We are at the last node of this sub expression.  */	  if (pmatch[reg_num].rm_so < cur_idx)	    {	      pmatch[reg_num].rm_eo = cur_idx;	      /* This is a non-empty match or we are not inside an optional		 subexpression.  Accept this right away.  */	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);	    }	  else	    {	      if (dfa->nodes[cur_node].opt_subexp		  && prev_idx_match[reg_num].rm_so != -1)		/* We transited through an empty match for an optional		   subexpression, like (a?)*, and this is not the subexp's		   first match.  Copy back the old content of the registers		   so that matches of an inner subexpression are undone as		   well, like in ((a?))*.  */		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);	      else		/* We completed a subexpression, but it may be part of		   an optional one, so do not update PREV_IDX_MATCH.  */		pmatch[reg_num].rm_eo = cur_idx;	    }	}    }}/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0   and sift the nodes in each states according to the following rules.   Updated state_log will be wrote to STATE_LOG.   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...     1. When STR_IDX == MATCH_LAST(the last index in the state_log):	If `a' isn't the LAST_NODE and `a' can't epsilon transit to	the LAST_NODE, we throw away the node `a'.     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts	string `s' and transit to `b':	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw	   away the node `a'.	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is	    thrown away, we throw away the node `a'.     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the	   node `a'.	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,	    we throw away the node `a'.  */#define STATE_NODE_CONTAINS(state,node) \  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))static reg_errcode_tinternal_functionsift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx){  reg_errcode_t err;  int null_cnt = 0;  Idx str_idx = sctx->last_str_idx;  re_node_set cur_dest;#ifdef DEBUG  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);#endif  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon     transit to the last_node and the last_node itself.  */  err = re_node_set_init_1 (&cur_dest, sctx->last_node);  if (BE (err != REG_NOERROR, 0))    return err;  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);  if (BE (err != REG_NOERROR, 0))    goto free_return;  /* Then check each states in the state_log.  */  while (str_idx > 0)    {      /* Update counters.  */      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;      if (null_cnt > mctx->max_mb_elem_len)	{	  memset (sctx->sifted_states, '\0',		  sizeof (re_dfastate_t *) * str_idx);	  re_node_set_free (&cur_dest);	  return REG_NOERROR;	}      re_node_set_empty (&cur_dest);      --str_idx;      if (mctx->state_log[str_idx])	{	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);          if (BE (err != REG_NOERROR, 0))	    goto free_return;	}      /* Add all the nodes which satisfy the following conditions:	 - It can epsilon transit to a node in CUR_DEST.	 - It is in CUR_SRC.	 And update state_log.  */      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);      if (BE (err != REG_NOERROR, 0))	goto free_return;    }  err = REG_NOERROR; free_return:  re_node_set_free (&cur_dest);  return err;}static reg_errcode_tinternal_functionbuild_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,		     Idx str_idx, re_node_set *cur_dest){  const re_dfa_t *const dfa = mctx->dfa;  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;  Idx i;  /* Then build the next sifted state.     We build the next sifted state on `cur_dest', and update     `sifted_states[str_idx]' with `cur_dest'.     Note:     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.     `cur_src' points the node_set of the old `state_log[str_idx]'     (with the epsilon nodes pre-filtered out).  */  for (i = 0; i < cur_src->nelem; i++)    {      Idx prev_node = cur_src->elems[i];      int naccepted = 0;      bool ok;#ifdef DEBUG      re_token_type_t type = dfa->nodes[prev_node].type;      assert (!IS_EPSILON_NODE (type));#endif#ifdef RE_ENABLE_I18N      /* If the node may accept `multi byte'.  */      if (dfa->nodes[prev_node].accept_mb)	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,					 str_idx, sctx->last_str_idx);#endif /* RE_ENABLE_I18N */      /* We don't check backreferences here.	 See update_cur_sifted_state().  */      if (!naccepted	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],				  dfa->nexts[prev_node]))	naccepted = 1;      if (naccepted == 0)	continue;      if (sctx->limits.nelem)	{	  Idx to_idx = str_idx + naccepted;	  if (check_dst_limits (mctx, &sctx->limits,				dfa->nexts[prev_node], to_idx,				prev_node, str_idx))	    continue;	}      ok = re_node_set_insert (cur_dest, prev_node);      if (BE (! ok, 0))	return REG_ESPACE;    }
regexec.c - 源码说明

本页面展示了「gnu tar 源码包。 tar 软件是 Unix 系统下的一个打包软件」中的 regexec.c 源码文件，采用 C语言编程语言编写，共 2,175 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与tar相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?