📄 regex_internal.c
字号:
/* -*- buffer-read-only: t -*- vi: set ro: *//* DO NOT EDIT! GENERATED AUTOMATICALLY! *//* Extended regular expression matching and search library. Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */static void re_string_construct_common (const char *str, Idx len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa) internal_function;static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, re_hashval_t hash) internal_function;static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, unsigned int context, re_hashval_t hash) internal_function;/* Functions for string operation. *//* This function allocate the buffers. It is necessary to call re_string_reconstruct before using the object. */static reg_errcode_tinternal_functionre_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len, RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa){ reg_errcode_t ret; Idx init_buf_len; /* Ensure at least one character fits into the buffers. */ if (init_len < dfa->mb_cur_max) init_len = dfa->mb_cur_max; init_buf_len = (len + 1 < init_len) ? len + 1: init_len; re_string_construct_common (str, len, pstr, trans, icase, dfa); ret = re_string_realloc_buffers (pstr, init_buf_len); if (BE (ret != REG_NOERROR, 0)) return ret; pstr->word_char = dfa->word_char; pstr->word_ops_used = dfa->word_ops_used; pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; pstr->valid_raw_len = pstr->valid_len; return REG_NOERROR;}/* This function allocate the buffers, and initialize them. */static reg_errcode_tinternal_functionre_string_construct (re_string_t *pstr, const char *str, Idx len, RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa){ reg_errcode_t ret; memset (pstr, '\0', sizeof (re_string_t)); re_string_construct_common (str, len, pstr, trans, icase, dfa); if (len > 0) { ret = re_string_realloc_buffers (pstr, len + 1); if (BE (ret != REG_NOERROR, 0)) return ret; } pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; if (icase) {#ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) { while (1) { ret = build_wcs_upper_buffer (pstr); if (BE (ret != REG_NOERROR, 0)) return ret; if (pstr->valid_raw_len >= len) break; if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) break; ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); if (BE (ret != REG_NOERROR, 0)) return ret; } } else#endif /* RE_ENABLE_I18N */ build_upper_buffer (pstr); } else {#ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) build_wcs_buffer (pstr); else#endif /* RE_ENABLE_I18N */ { if (trans != NULL) re_string_translate_buffer (pstr); else { pstr->valid_len = pstr->bufs_len; pstr->valid_raw_len = pstr->bufs_len; } } } return REG_NOERROR;}/* Helper functions for re_string_allocate, and re_string_construct. */static reg_errcode_tinternal_functionre_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len){#ifdef RE_ENABLE_I18N if (pstr->mb_cur_max > 1) { wint_t *new_wcs; /* Avoid overflow. */ size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx)); if (BE (SIZE_MAX / max_object_size < new_buf_len, 0)) return REG_ESPACE; new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); if (BE (new_wcs == NULL, 0)) return REG_ESPACE; pstr->wcs = new_wcs; if (pstr->offsets != NULL) { Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len); if (BE (new_offsets == NULL, 0)) return REG_ESPACE; pstr->offsets = new_offsets; } }#endif /* RE_ENABLE_I18N */ if (pstr->mbs_allocated) { unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len); if (BE (new_mbs == NULL, 0)) return REG_ESPACE; pstr->mbs = new_mbs; } pstr->bufs_len = new_buf_len; return REG_NOERROR;}static voidinternal_functionre_string_construct_common (const char *str, Idx len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa){ pstr->raw_mbs = (const unsigned char *) str; pstr->len = len; pstr->raw_len = len; pstr->trans = trans; pstr->icase = icase; pstr->mbs_allocated = (trans != NULL || icase); pstr->mb_cur_max = dfa->mb_cur_max; pstr->is_utf8 = dfa->is_utf8; pstr->map_notascii = dfa->map_notascii; pstr->stop = pstr->len; pstr->raw_stop = pstr->stop;}#ifdef RE_ENABLE_I18N/* Build wide character buffer PSTR->WCS. If the byte sequence of the string are: <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> Then wide character buffer will be: <wc1> , WEOF , <wc2> , WEOF , <wc3> We use WEOF for padding, they indicate that the position isn't a first byte of a multibyte character. Note that this function assumes PSTR->VALID_LEN elements are already built and starts from PSTR->VALID_LEN. */static voidinternal_functionbuild_wcs_buffer (re_string_t *pstr){#ifdef _LIBC unsigned char buf[MB_LEN_MAX]; assert (MB_LEN_MAX >= pstr->mb_cur_max);#else unsigned char buf[64];#endif mbstate_t prev_st; Idx byte_idx, end_idx, remain_len; size_t mbclen; /* Build the buffers from pstr->valid_len to either pstr->len or pstr->bufs_len. */ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; for (byte_idx = pstr->valid_len; byte_idx < end_idx;) { wchar_t wc; const char *p; remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; /* Apply the translation if we need. */ if (BE (pstr->trans != NULL, 0)) { int i, ch; for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) { ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; } p = (const char *) buf; } else p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) { /* The buffer doesn't have enough space, finish to build. */ pstr->cur_state = prev_st; break; } else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) { /* We treat these cases as a singlebyte character. */ mbclen = 1; wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; if (BE (pstr->trans != NULL, 0)) wc = pstr->trans[wc]; pstr->cur_state = prev_st; } /* Write wide character and padding. */ pstr->wcs[byte_idx++] = wc; /* Write paddings. */ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } pstr->valid_len = byte_idx; pstr->valid_raw_len = byte_idx;}/* Build wide character buffer PSTR->WCS like build_wcs_buffer, but for REG_ICASE. */static reg_errcode_tinternal_functionbuild_wcs_upper_buffer (re_string_t *pstr){ mbstate_t prev_st; Idx src_idx, byte_idx, end_idx, remain_len; size_t mbclen;#ifdef _LIBC char buf[MB_LEN_MAX]; assert (MB_LEN_MAX >= pstr->mb_cur_max);#else char buf[64];#endif byte_idx = pstr->valid_len; end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; /* The following optimization assumes that ASCII characters can be mapped to wide characters with a simple cast. */ if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) { while (byte_idx < end_idx) { wchar_t wc; if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) && mbsinit (&pstr->cur_state)) { /* In case of a singlebyte character. */ pstr->mbs[byte_idx] = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); /* The next step uses the assumption that wchar_t is encoded ASCII-safe: all ASCII values can be converted like this. */ pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; ++byte_idx; continue; } remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen < (size_t) -2, 1)) { wchar_t wcu = wc; if (iswlower (wc)) { size_t mbcdlen; wcu = towupper (wc); mbcdlen = wcrtomb (buf, wcu, &prev_st); if (BE (mbclen == mbcdlen, 1)) memcpy (pstr->mbs + byte_idx, buf, mbclen); else { src_idx = byte_idx; goto offsets_needed; } } else memcpy (pstr->mbs + byte_idx, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); pstr->wcs[byte_idx++] = wcu; /* Write paddings. */ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } else if (mbclen == (size_t) -1 || mbclen == 0) { /* It is an invalid character or '\0'. Just use the byte. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; pstr->mbs[byte_idx] = ch; /* And also cast it to wide char. */ pstr->wcs[byte_idx++] = (wchar_t) ch; if (BE (mbclen == (size_t) -1, 0)) pstr->cur_state = prev_st; } else { /* The buffer doesn't have enough space, finish to build. */ pstr->cur_state = prev_st; break; } } pstr->valid_len = byte_idx; pstr->valid_raw_len = byte_idx; return REG_NOERROR; } else for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) { wchar_t wc; const char *p; offsets_needed: remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; if (BE (pstr->trans != NULL, 0)) { int i, ch; for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) { ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; buf[i] = pstr->trans[ch]; } p = (const char *) buf; } else p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); if (BE (mbclen < (size_t) -2, 1)) { wchar_t wcu = wc; if (iswlower (wc)) { size_t mbcdlen; wcu = towupper (wc); mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); if (BE (mbclen == mbcdlen, 1)) memcpy (pstr->mbs + byte_idx, buf, mbclen); else if (mbcdlen != (size_t) -1) { size_t i; if (byte_idx + mbcdlen > pstr->bufs_len) { pstr->cur_state = prev_st; break; } if (pstr->offsets == NULL) { pstr->offsets = re_malloc (Idx, pstr->bufs_len); if (pstr->offsets == NULL) return REG_ESPACE; } if (!pstr->offsets_needed) { for (i = 0; i < (size_t) byte_idx; ++i) pstr->offsets[i] = i; pstr->offsets_needed = 1; } memcpy (pstr->mbs + byte_idx, buf, mbcdlen); pstr->wcs[byte_idx] = wcu; pstr->offsets[byte_idx] = src_idx; for (i = 1; i < mbcdlen; ++i) { pstr->offsets[byte_idx + i] = src_idx + (i < mbclen ? i : mbclen - 1); pstr->wcs[byte_idx + i] = WEOF; } pstr->len += mbcdlen - mbclen; if (pstr->raw_stop > src_idx) pstr->stop += mbcdlen - mbclen; end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; byte_idx += mbcdlen; src_idx += mbclen; continue; } else memcpy (pstr->mbs + byte_idx, p, mbclen);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -