📄 regcomp.c
字号:
regex_t *_Restrict_ preg; const char *_Restrict_ pattern; int cflags;{ reg_errcode_t ret; reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC); preg->buffer = NULL; preg->allocated = 0; preg->used = 0; /* Try to allocate space for the fastmap. */ preg->fastmap = re_malloc (char, SBC_MAX); if (BE (preg->fastmap == NULL, 0)) return REG_ESPACE; syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; /* If REG_NEWLINE is set, newlines are treated differently. */ if (cflags & REG_NEWLINE) { /* REG_NEWLINE implies neither . nor [^...] match newline. */ syntax &= ~RE_DOT_NEWLINE; syntax |= RE_HAT_LISTS_NOT_NEWLINE; /* It also changes the matching behavior. */ preg->newline_anchor = 1; } else preg->newline_anchor = 0; preg->no_sub = !!(cflags & REG_NOSUB); preg->translate = NULL; ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); /* POSIX doesn't distinguish between an unmatched open-group and an unmatched close-group: both are REG_EPAREN. */ if (ret == REG_ERPAREN) ret = REG_EPAREN; /* We have already checked preg->fastmap != NULL. */ if (BE (ret == REG_NOERROR, 1)) /* Compute the fastmap now, since regexec cannot modify the pattern buffer. This function never fails in this implementation. */ (void) re_compile_fastmap (preg); else { /* Some error occurred while compiling the expression. */ re_free (preg->fastmap); preg->fastmap = NULL; } return (int) ret;}#ifdef _LIBCweak_alias (__regcomp, regcomp)#endif/* Returns a message corresponding to an error code, ERRCODE, returned from either regcomp or regexec. We don't use PREG here. */#ifdef _LIBCsize_tregerror (errcode, preg, errbuf, errbuf_size) int errcode; const regex_t *_Restrict_ preg; char *_Restrict_ errbuf; size_t errbuf_size;#else /* size_t might promote */size_tregerror (int errcode, const regex_t *_Restrict_ preg, char *_Restrict_ errbuf, size_t errbuf_size)#endif{ const char *msg; size_t msg_size; if (BE (errcode < 0 || errcode >= (int) (sizeof (__re_error_msgid_idx) / sizeof (__re_error_msgid_idx[0])), 0)) /* Only error codes returned by the rest of the code should be passed to this routine. If we are given anything else, or if other regex code generates an invalid error code, then the program has a bug. Dump core so we can fix it. */ abort (); msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); msg_size = strlen (msg) + 1; /* Includes the null. */ if (BE (errbuf_size != 0, 1)) { size_t cpy_size = msg_size; if (BE (msg_size > errbuf_size, 0)) { cpy_size = errbuf_size - 1; errbuf[cpy_size] = '\0'; } memcpy (errbuf, msg, cpy_size); } return msg_size;}#ifdef _LIBCweak_alias (__regerror, regerror)#endif#ifdef RE_ENABLE_I18N/* This static array is used for the map to single-byte characters when UTF-8 is used. Otherwise we would allocate memory just to initialize it the same all the time. UTF-8 is the preferred encoding so this is a worthwhile optimization. */static const bitset_t utf8_sb_map ={ /* Set the first 128 bits. */# if 4 * BITSET_WORD_BITS < ASCII_CHARS# error "bitset_word_t is narrower than 32 bits"# elif 3 * BITSET_WORD_BITS < ASCII_CHARS BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,# elif 2 * BITSET_WORD_BITS < ASCII_CHARS BITSET_WORD_MAX, BITSET_WORD_MAX,# elif 1 * BITSET_WORD_BITS < ASCII_CHARS BITSET_WORD_MAX,# endif (BITSET_WORD_MAX >> (SBC_MAX % BITSET_WORD_BITS == 0 ? 0 : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))};#endifstatic voidfree_dfa_content (re_dfa_t *dfa){ Idx i, j; if (dfa->nodes) for (i = 0; i < dfa->nodes_len; ++i) free_token (dfa->nodes + i); re_free (dfa->nexts); for (i = 0; i < dfa->nodes_len; ++i) { if (dfa->eclosures != NULL) re_node_set_free (dfa->eclosures + i); if (dfa->inveclosures != NULL) re_node_set_free (dfa->inveclosures + i); if (dfa->edests != NULL) re_node_set_free (dfa->edests + i); } re_free (dfa->edests); re_free (dfa->eclosures); re_free (dfa->inveclosures); re_free (dfa->nodes); if (dfa->state_table) for (i = 0; i <= dfa->state_hash_mask; ++i) { struct re_state_table_entry *entry = dfa->state_table + i; for (j = 0; j < entry->num; ++j) { re_dfastate_t *state = entry->array[j]; free_state (state); } re_free (entry->array); } re_free (dfa->state_table);#ifdef RE_ENABLE_I18N if (dfa->sb_char != utf8_sb_map) re_free (dfa->sb_char);#endif re_free (dfa->subexp_map);#ifdef DEBUG re_free (dfa->re_str);#endif re_free (dfa);}/* Free dynamically allocated space used by PREG. */voidregfree (preg) regex_t *preg;{ re_dfa_t *dfa = (re_dfa_t *) preg->buffer; if (BE (dfa != NULL, 1)) free_dfa_content (dfa); preg->buffer = NULL; preg->allocated = 0; re_free (preg->fastmap); preg->fastmap = NULL; re_free (preg->translate); preg->translate = NULL;}#ifdef _LIBCweak_alias (__regfree, regfree)#endif/* Entry points compatible with 4.2 BSD regex library. We don't define them unless specifically requested. */#if defined _REGEX_RE_COMP || defined _LIBC/* BSD has one and only one pattern buffer. */static struct re_pattern_buffer re_comp_buf;char *# ifdef _LIBC/* Make these definitions weak in libc, so POSIX programs can redefine these names if they don't use our functions, and still use regcomp/regexec above without link errors. */weak_function# endifre_comp (s) const char *s;{ reg_errcode_t ret; char *fastmap; if (!s) { if (!re_comp_buf.buffer) return gettext ("No previous regular expression"); return 0; } if (re_comp_buf.buffer) { fastmap = re_comp_buf.fastmap; re_comp_buf.fastmap = NULL; __regfree (&re_comp_buf); memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); re_comp_buf.fastmap = fastmap; } if (re_comp_buf.fastmap == NULL) { re_comp_buf.fastmap = (char *) malloc (SBC_MAX); if (re_comp_buf.fastmap == NULL) return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) REG_ESPACE]); } /* Since `re_exec' always passes NULL for the `regs' argument, we don't need to initialize the pattern buffer fields which affect it. */ /* Match anchors at newlines. */ re_comp_buf.newline_anchor = 1; ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); if (!ret) return NULL; /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);}#ifdef _LIBClibc_freeres_fn (free_mem){ __regfree (&re_comp_buf);}#endif#endif /* _REGEX_RE_COMP *//* Internal entry point. Compile the regular expression PATTERN, whose length is LENGTH. SYNTAX indicate regular expression's syntax. */static reg_errcode_tre_compile_internal (regex_t *preg, const char * pattern, size_t length, reg_syntax_t syntax){ reg_errcode_t err = REG_NOERROR; re_dfa_t *dfa; re_string_t regexp; /* Initialize the pattern buffer. */ preg->fastmap_accurate = 0; preg->syntax = syntax; preg->not_bol = preg->not_eol = 0; preg->used = 0; preg->re_nsub = 0; preg->can_be_null = 0; preg->regs_allocated = REGS_UNALLOCATED; /* Initialize the dfa. */ dfa = (re_dfa_t *) preg->buffer; if (BE (preg->allocated < sizeof (re_dfa_t), 0)) { /* If zero allocated, but buffer is non-null, try to realloc enough space. This loses if buffer's address is bogus, but that is the user's responsibility. If ->buffer is NULL this is a simple allocation. */ dfa = re_realloc (preg->buffer, re_dfa_t, 1); if (dfa == NULL) return REG_ESPACE; preg->allocated = sizeof (re_dfa_t); preg->buffer = (unsigned char *) dfa; } preg->used = sizeof (re_dfa_t); err = init_dfa (dfa, length); if (BE (err != REG_NOERROR, 0)) { free_dfa_content (dfa); preg->buffer = NULL; preg->allocated = 0; return err; }#ifdef DEBUG /* Note: length+1 will not overflow since it is checked in init_dfa. */ dfa->re_str = re_malloc (char, length + 1); strncpy (dfa->re_str, pattern, length + 1);#endif __libc_lock_init (dfa->lock); err = re_string_construct (®exp, pattern, length, preg->translate, syntax & RE_ICASE, dfa); if (BE (err != REG_NOERROR, 0)) { re_compile_internal_free_return: free_workarea_compile (preg); re_string_destruct (®exp); free_dfa_content (dfa); preg->buffer = NULL; preg->allocated = 0; return err; } /* Parse the regular expression, and build a structure tree. */ preg->re_nsub = 0; dfa->str_tree = parse (®exp, preg, syntax, &err); if (BE (dfa->str_tree == NULL, 0)) goto re_compile_internal_free_return; /* Analyze the tree and create the nfa. */ err = analyze (preg); if (BE (err != REG_NOERROR, 0)) goto re_compile_internal_free_return;#ifdef RE_ENABLE_I18N /* If possible, do searching in single byte encoding to speed things up. */ if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) optimize_utf8 (dfa);#endif /* Then create the initial state of the dfa. */ err = create_initial_state (dfa); /* Release work areas. */ free_workarea_compile (preg); re_string_destruct (®exp); if (BE (err != REG_NOERROR, 0)) { free_dfa_content (dfa); preg->buffer = NULL; preg->allocated = 0; } return err;}/* Initialize DFA. We use the length of the regular expression PAT_LEN as the initial length of some arrays. */static reg_errcode_tinit_dfa (re_dfa_t *dfa, size_t pat_len){ __re_size_t table_size;#ifdef RE_ENABLE_I18N size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));#else size_t max_i18n_object_size = 0;#endif size_t max_object_size = MAX (sizeof (struct re_state_table_entry), MAX (sizeof (re_token_t), MAX (sizeof (re_node_set), MAX (sizeof (regmatch_t), max_i18n_object_size)))); memset (dfa, '\0', sizeof (re_dfa_t)); /* Force allocation of str_tree_storage the first time. */ dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; /* Avoid overflows. The extra "/ 2" is for the table_size doubling calculation below, and for similar doubling calculations elsewhere. And it's <= rather than <, because some of the doubling calculations add 1 afterwards. */ if (BE (SIZE_MAX / max_object_size / 2 <= pat_len, 0)) return REG_ESPACE; dfa->nodes_alloc = pat_len + 1; dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); /* table_size = 2 ^ ceil(log pat_len) */ for (table_size = 1; ; table_size <<= 1) if (table_size > pat_len) break; dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); dfa->state_hash_mask = table_size - 1; dfa->mb_cur_max = MB_CUR_MAX;#ifdef _LIBC if (dfa->mb_cur_max == 6 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) dfa->is_utf8 = 1; dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) != 0);#else if (strcmp (locale_charset (), "UTF-8") == 0) dfa->is_utf8 = 1; /* We check exhaustively in the loop below if this charset is a superset of ASCII. */ dfa->map_notascii = 0;#endif#ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) { if (dfa->is_utf8) dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; else { int i, j, ch; dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); if (BE (dfa->sb_char == NULL, 0)) return REG_ESPACE; /* Set the bits corresponding to single byte chars. */ for (i = 0, ch = 0; i < BITSET_WORDS; ++i) for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) { wint_t wch = __btowc (ch); if (wch != WEOF) dfa->sb_char[i] |= (bitset_word_t) 1 << j;# ifndef _LIBC if (isascii (ch) && wch != ch) dfa->map_notascii = 1;# endif } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -