📄 regcomp.c

📁 gnu tar 源码包。 tar 软件是 Unix 系统下的一个打包软件
💻 C
📖 第 1 页 / 共 5 页
字号:
    default:      if (node->left)	node->left->next = node->next;      if (node->right)        node->right->next = node->next;      break;    }  return REG_NOERROR;}/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */static reg_errcode_tlink_nfa_nodes (void *extra, bin_tree_t *node){  re_dfa_t *dfa = (re_dfa_t *) extra;  Idx idx = node->node_idx;  reg_errcode_t err = REG_NOERROR;  switch (node->token.type)    {    case CONCAT:      break;    case END_OF_RE:      assert (node->next == NULL);      break;    case OP_DUP_ASTERISK:    case OP_ALT:      {	Idx left, right;	dfa->has_plural_match = 1;	if (node->left != NULL)	  left = node->left->first->node_idx;	else	  left = node->next->node_idx;	if (node->right != NULL)	  right = node->right->first->node_idx;	else	  right = node->next->node_idx;	assert (REG_VALID_INDEX (left));	assert (REG_VALID_INDEX (right));	err = re_node_set_init_2 (dfa->edests + idx, left, right);      }      break;    case ANCHOR:    case OP_OPEN_SUBEXP:    case OP_CLOSE_SUBEXP:      err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);      break;    case OP_BACK_REF:      dfa->nexts[idx] = node->next->node_idx;      if (node->token.type == OP_BACK_REF)	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);      break;    default:      assert (!IS_EPSILON_NODE (node->token.type));      dfa->nexts[idx] = node->next->node_idx;      break;    }  return err;}/* Duplicate the epsilon closure of the node ROOT_NODE.   Note that duplicated nodes have constraint INIT_CONSTRAINT in addition   to their own constraint.  */static reg_errcode_tinternal_functionduplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,			Idx root_node, unsigned int init_constraint){  Idx org_node, clone_node;  bool ok;  unsigned int constraint = init_constraint;  for (org_node = top_org_node, clone_node = top_clone_node;;)    {      Idx org_dest, clone_dest;      if (dfa->nodes[org_node].type == OP_BACK_REF)	{	  /* If the back reference epsilon-transit, its destination must	     also have the constraint.  Then duplicate the epsilon closure	     of the destination of the back reference, and store it in	     edests of the back reference.  */	  org_dest = dfa->nexts[org_node];	  re_node_set_empty (dfa->edests + clone_node);	  clone_dest = duplicate_node (dfa, org_dest, constraint);	  if (BE (clone_dest == REG_MISSING, 0))	    return REG_ESPACE;	  dfa->nexts[clone_node] = dfa->nexts[org_node];	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);	  if (BE (! ok, 0))	    return REG_ESPACE;	}      else if (dfa->edests[org_node].nelem == 0)	{	  /* In case of the node can't epsilon-transit, don't duplicate the	     destination and store the original destination as the	     destination of the node.  */	  dfa->nexts[clone_node] = dfa->nexts[org_node];	  break;	}      else if (dfa->edests[org_node].nelem == 1)	{	  /* In case of the node can epsilon-transit, and it has only one	     destination.  */	  org_dest = dfa->edests[org_node].elems[0];	  re_node_set_empty (dfa->edests + clone_node);	  if (dfa->nodes[org_node].type == ANCHOR)	    {	      /* In case of the node has another constraint, append it.  */	      if (org_node == root_node && clone_node != org_node)		{		  /* ...but if the node is root_node itself, it means the		     epsilon closure have a loop, then tie it to the		     destination of the root_node.  */		  ok = re_node_set_insert (dfa->edests + clone_node, org_dest);		  if (BE (! ok, 0))		    return REG_ESPACE;		  break;		}	      constraint |= dfa->nodes[org_node].opr.ctx_type;	    }	  clone_dest = duplicate_node (dfa, org_dest, constraint);	  if (BE (clone_dest == REG_MISSING, 0))	    return REG_ESPACE;	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);	  if (BE (! ok, 0))	    return REG_ESPACE;	}      else /* dfa->edests[org_node].nelem == 2 */	{	  /* In case of the node can epsilon-transit, and it has two	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */	  org_dest = dfa->edests[org_node].elems[0];	  re_node_set_empty (dfa->edests + clone_node);	  /* Search for a duplicated node which satisfies the constraint.  */	  clone_dest = search_duplicated_node (dfa, org_dest, constraint);	  if (clone_dest == REG_MISSING)	    {	      /* There are no such a duplicated node, create a new one.  */	      reg_errcode_t err;	      clone_dest = duplicate_node (dfa, org_dest, constraint);	      if (BE (clone_dest == REG_MISSING, 0))		return REG_ESPACE;	      ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);	      if (BE (! ok, 0))		return REG_ESPACE;	      err = duplicate_node_closure (dfa, org_dest, clone_dest,					    root_node, constraint);	      if (BE (err != REG_NOERROR, 0))		return err;	    }	  else	    {	      /* There are a duplicated node which satisfy the constraint,		 use it to avoid infinite loop.  */	      ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);	      if (BE (! ok, 0))		return REG_ESPACE;	    }	  org_dest = dfa->edests[org_node].elems[1];	  clone_dest = duplicate_node (dfa, org_dest, constraint);	  if (BE (clone_dest == REG_MISSING, 0))	    return REG_ESPACE;	  ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);	  if (BE (! ok, 0))	    return REG_ESPACE;	}      org_node = org_dest;      clone_node = clone_dest;    }  return REG_NOERROR;}/* Search for a node which is duplicated from the node ORG_NODE, and   satisfies the constraint CONSTRAINT.  */static Idxsearch_duplicated_node (const re_dfa_t *dfa, Idx org_node,			unsigned int constraint){  Idx idx;  for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)    {      if (org_node == dfa->org_indices[idx]	  && constraint == dfa->nodes[idx].constraint)	return idx; /* Found.  */    }  return REG_MISSING; /* Not found.  */}/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.   Return the index of the new node, or REG_MISSING if insufficient storage is   available.  */static Idxduplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint){  Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);  if (BE (dup_idx != REG_MISSING, 1))    {      dfa->nodes[dup_idx].constraint = constraint;      if (dfa->nodes[org_idx].type == ANCHOR)	dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;      dfa->nodes[dup_idx].duplicated = 1;      /* Store the index of the original node.  */      dfa->org_indices[dup_idx] = org_idx;    }  return dup_idx;}static reg_errcode_tcalc_inveclosure (re_dfa_t *dfa){  Idx src, idx;  bool ok;  for (idx = 0; idx < dfa->nodes_len; ++idx)    re_node_set_init_empty (dfa->inveclosures + idx);  for (src = 0; src < dfa->nodes_len; ++src)    {      Idx *elems = dfa->eclosures[src].elems;      for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)	{	  ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);	  if (BE (! ok, 0))	    return REG_ESPACE;	}    }  return REG_NOERROR;}/* Calculate "eclosure" for all the node in DFA.  */static reg_errcode_tcalc_eclosure (re_dfa_t *dfa){  Idx node_idx;  bool incomplete;#ifdef DEBUG  assert (dfa->nodes_len > 0);#endif  incomplete = false;  /* For each nodes, calculate epsilon closure.  */  for (node_idx = 0; ; ++node_idx)    {      reg_errcode_t err;      re_node_set eclosure_elem;      if (node_idx == dfa->nodes_len)	{	  if (!incomplete)	    break;	  incomplete = false;	  node_idx = 0;	}#ifdef DEBUG      assert (dfa->eclosures[node_idx].nelem != REG_MISSING);#endif      /* If we have already calculated, skip it.  */      if (dfa->eclosures[node_idx].nelem != 0)	continue;      /* Calculate epsilon closure of `node_idx'.  */      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);      if (BE (err != REG_NOERROR, 0))	return err;      if (dfa->eclosures[node_idx].nelem == 0)	{	  incomplete = true;	  re_node_set_free (&eclosure_elem);	}    }  return REG_NOERROR;}/* Calculate epsilon closure of NODE.  */static reg_errcode_tcalc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root){  reg_errcode_t err;  unsigned int constraint;  Idx i;  bool incomplete;  bool ok;  re_node_set eclosure;  incomplete = false;  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);  if (BE (err != REG_NOERROR, 0))    return err;  /* This indicates that we are calculating this node now.     We reference this value to avoid infinite loop.  */  dfa->eclosures[node].nelem = REG_MISSING;  constraint = ((dfa->nodes[node].type == ANCHOR)		? dfa->nodes[node].opr.ctx_type : 0);  /* If the current node has constraints, duplicate all nodes.     Since they must inherit the constraints.  */  if (constraint      && dfa->edests[node].nelem      && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)    {      err = duplicate_node_closure (dfa, node, node, node, constraint);      if (BE (err != REG_NOERROR, 0))	return err;    }  /* Expand each epsilon destination nodes.  */  if (IS_EPSILON_NODE(dfa->nodes[node].type))    for (i = 0; i < dfa->edests[node].nelem; ++i)      {	re_node_set eclosure_elem;	Idx edest = dfa->edests[node].elems[i];	/* If calculating the epsilon closure of `edest' is in progress,	   return intermediate result.  */	if (dfa->eclosures[edest].nelem == REG_MISSING)	  {	    incomplete = true;	    continue;	  }	/* If we haven't calculated the epsilon closure of `edest' yet,	   calculate now. Otherwise use calculated epsilon closure.  */	if (dfa->eclosures[edest].nelem == 0)	  {	    err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);	    if (BE (err != REG_NOERROR, 0))	      return err;	  }	else	  eclosure_elem = dfa->eclosures[edest];	/* Merge the epsilon closure of `edest'.  */	re_node_set_merge (&eclosure, &eclosure_elem);	/* If the epsilon closure of `edest' is incomplete,	   the epsilon closure of this node is also incomplete.  */	if (dfa->eclosures[edest].nelem == 0)	  {	    incomplete = true;	    re_node_set_free (&eclosure_elem);	  }      }  /* Epsilon closures include itself.  */  ok = re_node_set_insert (&eclosure, node);  if (BE (! ok, 0))    return REG_ESPACE;  if (incomplete && !root)    dfa->eclosures[node].nelem = 0;  else    dfa->eclosures[node] = eclosure;  *new_set = eclosure;  return REG_NOERROR;}/* Functions for token which are used in the parser.  *//* Fetch a token from INPUT.   We must not use this function inside bracket expressions.  */static voidinternal_functionfetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax){  re_string_skip_bytes (input, peek_token (result, input, syntax));}/* Peek a token from INPUT, and return the length of the token.   We must not use this function inside bracket expressions.  */static intinternal_functionpeek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax){  unsigned char c;  if (re_string_eoi (input))    {      token->type = END_OF_RE;      return 0;    }  c = re_string_peek_byte (input, 0);  token->opr.c = c;  token->word_char = 0;#ifdef RE_ENABLE_I18N  token->mb_partial = 0;  if (input->mb_cur_max > 1 &&      !re_string_first_byte (input, re_string_cur_idx (input)))    {      token->type = CHARACTER;      token->mb_partial = 1;      return 1;    }#endif  if (c == '\\')    {      unsigned char c2;      if (re_string_cur_idx (input) + 1 >= re_string_length (input))	{	  token->type = BACK_SLASH;	  return 1;	}      c2 = re_string_peek_byte_case (input, 1);      token->opr.c = c2;      token->type = CHARACTER;#ifdef RE_ENABLE_I18N      if (input->mb_cur_max > 1)	{	  wint_t wc = re_string_wchar_at (input,					  re_string_cur_idx (input) + 1);	  token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;	}      else#endif	token->word_char = IS_WORD_CHAR (c2) != 0;      switch (c2)	{	case '|':	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))	    token->type = OP_ALT;	  break;	case '1': case '2': case '3': case '4': case '5':	case '6': case '7': case '8': case '9':	  if (!(syntax & RE_NO_BK_REFS))	    {	      token->type = OP_BACK_REF;	      token->opr.idx = c2 - '1';	    }	  break;	case '<':	  if (!(syntax & RE_NO_GNU_OPS))	    {	      token->type = ANCHOR;	      token->opr.ctx_type = WORD_FIRST;	    }	  break;	case '>':	  if (!(syntax & RE_NO_GNU_OPS))	    {	      token->type = ANCHOR;	      token->opr.ctx_type = WORD_LAST;	    }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -