⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexec.c

📁 硬盘各项性能的测试,如温度容量版本健康度型号
💻 C
📖 第 1 页 / 共 5 页
字号:
/* Extended regular expression matching and search library.   Copyright (C) 2002, 2003 Free Software Foundation, Inc.   This file is part of the GNU C Library.   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.   The GNU C Library is free software; you can redistribute it and/or   modify it under the terms of the GNU Lesser General Public   License as published by the Free Software Foundation; either   version 2.1 of the License, or (at your option) any later version.   The GNU C Library is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   Lesser General Public License for more details.   You should have received a copy of the GNU Lesser General Public   License along with the GNU C Library; if not, write to the Free   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   02111-1307 USA.  */static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,				     re_string_t *input, int n);static void match_ctx_clean (re_match_context_t *mctx);static void match_ctx_free (re_match_context_t *cache);static void match_ctx_free_subtops (re_match_context_t *mctx);static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,					  int str_idx, int from, int to);static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);static void match_ctx_clear_flag (re_match_context_t *mctx);static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,					   int str_idx);static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,						   int node, int str_idx);static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,			   re_dfastate_t **limited_sts, int last_node,			   int last_str_idx, int check_subexp);static reg_errcode_t re_search_internal (const regex_t *preg,					 const char *string, int length,					 int start, int range, int stop,					 size_t nmatch, regmatch_t pmatch[],					 int eflags);static int re_search_2_stub (struct re_pattern_buffer *bufp,			     const char *string1, int length1,			     const char *string2, int length2,			     int start, int range, struct re_registers *regs,			     int stop, int ret_len);static int re_search_stub (struct re_pattern_buffer *bufp,			   const char *string, int length, int start,			   int range, int stop, struct re_registers *regs,			   int ret_len);static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,			      int nregs, int regs_allocated);static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,							 const regex_t *preg,							 const re_match_context_t *mctx,							 int idx);static reg_errcode_t prune_impossible_nodes (const regex_t *preg,					     re_match_context_t *mctx);static int check_matching (const regex_t *preg, re_match_context_t *mctx,			   int fl_search, int fl_longest_match);static int check_halt_node_context (const re_dfa_t *dfa, int node,				    unsigned int context);static int check_halt_state_context (const regex_t *preg,				     const re_dfastate_t *state,				     const re_match_context_t *mctx, int idx);static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,			 int cur_idx, int nmatch);static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,			      const re_match_context_t *mctx,			      int *pidx, int node, re_node_set *eps_via_nodes,			      struct re_fail_stack_t *fs);static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,				      int str_idx, int *dests, int nregs,				      regmatch_t *regs,				      re_node_set *eps_via_nodes);static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,			   regmatch_t *regs, re_node_set *eps_via_nodes);static reg_errcode_t set_regs (const regex_t *preg,			       const re_match_context_t *mctx,			       size_t nmatch, regmatch_t *pmatch,			       int fl_backtrack);static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);#ifdef RE_ENABLE_I18Nstatic int sift_states_iter_mb (const regex_t *preg,				const re_match_context_t *mctx,				re_sift_context_t *sctx,				int node_idx, int str_idx, int max_str_idx);#endif /* RE_ENABLE_I18N */static reg_errcode_t sift_states_backward (const regex_t *preg,					   re_match_context_t *mctx,					   re_sift_context_t *sctx);static reg_errcode_t update_cur_sifted_state (const regex_t *preg,					      re_match_context_t *mctx,					      re_sift_context_t *sctx,					      int str_idx,					      re_node_set *dest_nodes);static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,					    re_node_set *dest_nodes,					    const re_node_set *candidates);static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,					    re_node_set *dest_nodes,					    const re_node_set *and_nodes);static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,			     re_match_context_t *mctx, int dst_node,			     int dst_idx, int src_node, int src_idx);static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,				      int limit, re_node_set *eclosures,				      int subexp_idx, int node, int str_idx);static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,					  re_node_set *dest_nodes,					  const re_node_set *candidates,					  re_node_set *limits,					  struct re_backref_cache_entry *bkref_ents,					  int str_idx);static reg_errcode_t sift_states_bkref (const regex_t *preg,					re_match_context_t *mctx,					re_sift_context_t *sctx,					int str_idx, re_node_set *dest_nodes);static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,					      int next_state_log_idx);static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,					re_dfastate_t **src, int num);static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,				     re_match_context_t *mctx,				     re_dfastate_t *state, int fl_search);static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,						re_match_context_t *mctx,						re_node_set *cur_nodes,						int str_idx);static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,					re_dfastate_t *pstate,					int fl_search,					re_match_context_t *mctx);#ifdef RE_ENABLE_I18Nstatic reg_errcode_t transit_state_mb (const regex_t *preg,				       re_dfastate_t *pstate,				       re_match_context_t *mctx);#endif /* RE_ENABLE_I18N */static reg_errcode_t transit_state_bkref (const regex_t *preg,					  re_node_set *nodes,					  re_match_context_t *mctx);static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,				 int bkref_node, int bkref_str_idx);static reg_errcode_t get_subexp_sub (const regex_t *preg,				     re_match_context_t *mctx,				     re_sub_match_top_t *sub_top,				     re_sub_match_last_t *sub_last,				     int bkref_node, int bkref_str);static int find_subexp_node (re_dfa_t *dfa, re_node_set *nodes,			     int subexp_idx, int fl_open);static reg_errcode_t check_arrival (const regex_t *preg,				    re_match_context_t *mctx,				    state_array_t *path, int top_node,				    int top_str, int last_node, int last_str,				    int fl_open);static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,						   re_dfa_t *dfa,						   re_match_context_t *mctx,						   int str_idx,						   re_node_set *cur_nodes,						   re_node_set *next_nodes);static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,					       re_node_set *cur_nodes,					       int ex_subexp, int fl_open);static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,						   re_node_set *dst_nodes,						   int target, int ex_subexp,						   int fl_open);static reg_errcode_t expand_bkref_cache (const regex_t *preg,					 re_match_context_t *mctx,					 re_node_set *cur_nodes, int cur_str,					 int last_str, int subexp_num,					 int fl_open);static re_dfastate_t **build_trtable (const regex_t *dfa,				      const re_dfastate_t *state,				      int fl_search);#ifdef RE_ENABLE_I18Nstatic int check_node_accept_bytes (const regex_t *preg, int node_idx,				    const re_string_t *input, int idx);# ifdef _LIBCstatic unsigned int find_collation_sequence_value (const unsigned char *mbs,						   size_t name_len);# endif /* _LIBC */#endif /* RE_ENABLE_I18N */static int group_nodes_into_DFAstates (const regex_t *dfa,				       const re_dfastate_t *state,				       re_node_set *states_node,				       bitset *states_ch);static int check_node_accept (const regex_t *preg, const re_token_t *node,			      const re_match_context_t *mctx, int idx);static reg_errcode_t extend_buffers (re_match_context_t *mctx);/* Entry point for POSIX code.  *//* regexec searches for a given pattern, specified by PREG, in the   string STRING.   If NMATCH is zero or REG_NOSUB was set in the cflags argument to   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at   least NMATCH elements, and we set them to the offsets of the   corresponding matched substrings.   EFLAGS specifies `execution flags' which affect matching: if   REG_NOTBOL is set, then ^ does not match at the beginning of the   string; if REG_NOTEOL is set, then $ does not match at the end.   We return 0 if we find a match and REG_NOMATCH if not.  */intregexec (preg, string, nmatch, pmatch, eflags)    const regex_t *__restrict preg;    const char *__restrict string;    size_t nmatch;    regmatch_t pmatch[];    int eflags;{  reg_errcode_t err;  int length = strlen (string);  if (preg->no_sub)    err = re_search_internal (preg, string, length, 0, length, length, 0,			      NULL, eflags);  else    err = re_search_internal (preg, string, length, 0, length, length, nmatch,			      pmatch, eflags);  return err != REG_NOERROR;}#ifdef _LIBCweak_alias (__regexec, regexec)#endif/* Entry points for GNU code.  *//* re_match, re_search, re_match_2, re_search_2   The former two functions operate on STRING with length LENGTH,   while the later two operate on concatenation of STRING1 and STRING2   with lengths LENGTH1 and LENGTH2, respectively.   re_match() matches the compiled pattern in BUFP against the string,   starting at index START.   re_search() first tries matching at index START, then it tries to match   starting from index START + 1, and so on.  The last start position tried   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same   way as re_match().)   The parameter STOP of re_{match,search}_2 specifies that no match exceeding   the first STOP characters of the concatenation of the strings should be   concerned.   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are   computed relative to the concatenation, not relative to the individual   strings.)   On success, re_match* functions return the length of the match, re_search*   return the position of the start of the match.  Return value -1 means no   match was found and -2 indicates an internal error.  */intre_match (bufp, string, length, start, regs)    struct re_pattern_buffer *bufp;    const char *string;    int length, start;    struct re_registers *regs;{  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);}#ifdef _LIBCweak_alias (__re_match, re_match)#endifintre_search (bufp, string, length, start, range, regs)    struct re_pattern_buffer *bufp;    const char *string;    int length, start, range;    struct re_registers *regs;{  return re_search_stub (bufp, string, length, start, range, length, regs, 0);}#ifdef _LIBCweak_alias (__re_search, re_search)#endifintre_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)    struct re_pattern_buffer *bufp;    const char *string1, *string2;    int length1, length2, start, stop;    struct re_registers *regs;{  return re_search_2_stub (bufp, string1, length1, string2, length2,			   start, 0, regs, stop, 1);}#ifdef _LIBCweak_alias (__re_match_2, re_match_2)#endifintre_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)    struct re_pattern_buffer *bufp;    const char *string1, *string2;    int length1, length2, start, range, stop;    struct re_registers *regs;{  return re_search_2_stub (bufp, string1, length1, string2, length2,			   start, range, regs, stop, 0);}#ifdef _LIBCweak_alias (__re_search_2, re_search_2)#endifstatic intre_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,		  stop, ret_len)    struct re_pattern_buffer *bufp;    const char *string1, *string2;    int length1, length2, start, range, stop, ret_len;    struct re_registers *regs;{  const char *str;  int rval;  int len = length1 + length2;  int free_str = 0;  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))    return -2;  /* Concatenate the strings.  */  if (length2 > 0)    if (length1 > 0)      {	char *s = re_malloc (char, len);	if (BE (s == NULL, 0))	  return -2;	memcpy (s, string1, length1);	memcpy (s + length1, string2, length2);	str = s;	free_str = 1;      }    else      str = string2;  else    str = string1;  rval = re_search_stub (bufp, str, len, start, range, stop, regs,			 ret_len);  if (free_str)    re_free ((char *) str);  return rval;}/* The parameters have the same meaning as those of re_search.   Additional parameters:   If RET_LEN is nonzero the length of the match is returned (re_match style);   otherwise the position of the match is returned.  */static intre_search_stub (bufp, string, length, start, range, stop, regs, ret_len)    struct re_pattern_buffer *bufp;    const char *string;    int length, start, range, stop, ret_len;    struct re_registers *regs;{  reg_errcode_t result;  regmatch_t *pmatch;  int nregs, rval;  int eflags = 0;  /* Check for out-of-range.  */  if (BE (start < 0 || start > length, 0))    return -1;  if (BE (start + range > length, 0))    range = length - start;  else if (BE (start + range < 0, 0))    range = -start;  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;  /* Compile fastmap if we haven't yet.  */  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)    re_compile_fastmap (bufp);  if (BE (bufp->no_sub, 0))    regs = NULL;  /* We need at least 1 register.  */  if (regs == NULL)    nregs = 1;  else if (BE (bufp->regs_allocated == REGS_FIXED &&	       regs->num_regs < bufp->re_nsub + 1, 0))    {      nregs = regs->num_regs;      if (BE (nregs < 1, 0))	{	  /* Nothing can be copied to regs.  */	  regs = NULL;	  nregs = 1;	}    }  else    nregs = bufp->re_nsub + 1;  pmatch = re_malloc (regmatch_t, nregs);  if (BE (pmatch == NULL, 0))    return -2;  result = re_search_internal (bufp, string, length, start, range, stop,			       nregs, pmatch, eflags);  rval = 0;  /* I hope we needn't fill ther regs with -1's when no match was found.  */  if (result != REG_NOERROR)    rval = -1;  else if (regs != NULL)    {      /* If caller wants register contents data back, copy them.  */      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,					   bufp->regs_allocated);      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))	rval = -2;    }  if (BE (rval == 0, 1))    {      if (ret_len)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -