📄 regex_internal.c
字号:
/* Extended regular expression matching and search library. Copyright (C) 2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */static void re_string_construct_common (const char *str, int len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, int icase);#ifdef RE_ENABLE_I18Nstatic int re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc);#endif /* RE_ENABLE_I18N */static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, const re_node_set *nodes, unsigned int hash);static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, unsigned int hash);static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, const re_node_set *nodes, unsigned int hash);static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, const re_node_set *nodes, unsigned int context, unsigned int hash);static unsigned int inline calc_state_hash (const re_node_set *nodes, unsigned int context);/* Functions for string operation. *//* This function allocate the buffers. It is necessary to call re_string_reconstruct before using the object. */static reg_errcode_tre_string_allocate (pstr, str, len, init_len, trans, icase) re_string_t *pstr; const char *str; int len, init_len, icase; RE_TRANSLATE_TYPE trans;{ reg_errcode_t ret; int init_buf_len = (len + 1 < init_len) ? len + 1: init_len; re_string_construct_common (str, len, pstr, trans, icase); pstr->stop = pstr->len; ret = re_string_realloc_buffers (pstr, init_buf_len); if (BE (ret != REG_NOERROR, 0)) return ret; pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr) || MB_CUR_MAX > 1) ? pstr->valid_len : len; return REG_NOERROR;}/* This function allocate the buffers, and initialize them. */static reg_errcode_tre_string_construct (pstr, str, len, trans, icase) re_string_t *pstr; const char *str; int len, icase; RE_TRANSLATE_TYPE trans;{ reg_errcode_t ret; re_string_construct_common (str, len, pstr, trans, icase); pstr->stop = pstr->len; /* Set 0 so that this function can initialize whole buffers. */ pstr->valid_len = 0; if (len > 0) { ret = re_string_realloc_buffers (pstr, len + 1); if (BE (ret != REG_NOERROR, 0)) return ret; } pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; if (icase) {#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) build_wcs_upper_buffer (pstr); else#endif /* RE_ENABLE_I18N */ build_upper_buffer (pstr); } else {#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) build_wcs_buffer (pstr); else#endif /* RE_ENABLE_I18N */ { if (trans != NULL) re_string_translate_buffer (pstr); else pstr->valid_len = len; } } /* Initialized whole buffers, then valid_len == bufs_len. */ pstr->valid_len = pstr->bufs_len; return REG_NOERROR;}/* Helper functions for re_string_allocate, and re_string_construct. */static reg_errcode_tre_string_realloc_buffers (pstr, new_buf_len) re_string_t *pstr; int new_buf_len;{#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len); if (BE (new_array == NULL, 0)) return REG_ESPACE; pstr->wcs = new_array; }#endif /* RE_ENABLE_I18N */ if (MBS_ALLOCATED (pstr)) { unsigned char *new_array = re_realloc (pstr->mbs, unsigned char, new_buf_len); if (BE (new_array == NULL, 0)) return REG_ESPACE; pstr->mbs = new_array; } if (MBS_CASE_ALLOCATED (pstr)) { unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char, new_buf_len); if (BE (new_array == NULL, 0)) return REG_ESPACE; pstr->mbs_case = new_array; if (!MBS_ALLOCATED (pstr)) pstr->mbs = pstr->mbs_case; } pstr->bufs_len = new_buf_len; return REG_NOERROR;}static voidre_string_construct_common (str, len, pstr, trans, icase) const char *str; int len; re_string_t *pstr; RE_TRANSLATE_TYPE trans; int icase;{ memset (pstr, '\0', sizeof (re_string_t)); pstr->raw_mbs = (const unsigned char *) str; pstr->len = len; pstr->trans = trans; pstr->icase = icase ? 1 : 0;}#ifdef RE_ENABLE_I18N/* Build wide character buffer PSTR->WCS. If the byte sequence of the string are: <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> Then wide character buffer will be: <wc1> , WEOF , <wc2> , WEOF , <wc3> We use WEOF for padding, they indicate that the position isn't a first byte of a multibyte character. Note that this function assumes PSTR->VALID_LEN elements are already built and starts from PSTR->VALID_LEN. */static voidbuild_wcs_buffer (pstr) re_string_t *pstr;{ mbstate_t prev_st; int byte_idx, end_idx, mbclen, remain_len; /* Build the buffers from pstr->valid_len to either pstr->len or pstr->bufs_len. */ end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len; for (byte_idx = pstr->valid_len; byte_idx < end_idx;) { wchar_t wc; remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) { /* The buffer doesn't have enough space, finish to build. */ pstr->cur_state = prev_st; break; } else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) { /* We treat these cases as a singlebyte character. */ mbclen = 1; wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; pstr->cur_state = prev_st; } /* Apply the translateion if we need. */ if (pstr->trans != NULL && mbclen == 1) { int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]]; pstr->mbs_case[byte_idx] = ch; } /* Write wide character and padding. */ pstr->wcs[byte_idx++] = wc; /* Write paddings. */ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } pstr->valid_len = byte_idx;}/* Build wide character buffer PSTR->WCS like build_wcs_buffer, but for REG_ICASE. */static voidbuild_wcs_upper_buffer (pstr) re_string_t *pstr;{ mbstate_t prev_st; int byte_idx, end_idx, mbclen, remain_len; /* Build the buffers from pstr->valid_len to either pstr->len or pstr->bufs_len. */ end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len; for (byte_idx = pstr->valid_len; byte_idx < end_idx;) { wchar_t wc; remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) { /* The buffer doesn't have enough space, finish to build. */ pstr->cur_state = prev_st; break; } else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0) { /* In case of a singlebyte character. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; /* Apply the translateion if we need. */ if (pstr->trans != NULL && mbclen == 1) { ch = pstr->trans[ch]; pstr->mbs_case[byte_idx] = ch; } pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc; pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch; if (BE (mbclen == (size_t) -1, 0)) pstr->cur_state = prev_st; } else /* mbclen > 1 */ { if (iswlower (wc)) wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); else memcpy (pstr->mbs + byte_idx, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc; /* Write paddings. */ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } } pstr->valid_len = byte_idx;}/* Skip characters until the index becomes greater than NEW_RAW_IDX. Return the index. */static intre_string_skip_chars (pstr, new_raw_idx, last_wc) re_string_t *pstr; int new_raw_idx; wint_t *last_wc;{ mbstate_t prev_st; int rawbuf_idx, mbclen; wchar_t wc = 0; /* Skip the characters which are not necessary to check. */ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len; rawbuf_idx < new_raw_idx;) { int remain_len; remain_len = pstr->len - rawbuf_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) { /* We treat these cases as a singlebyte character. */ mbclen = 1; pstr->cur_state = prev_st; } /* Then proceed the next character. */ rawbuf_idx += mbclen; } *last_wc = (wint_t) wc; return rawbuf_idx;}#endif /* RE_ENABLE_I18N *//* Build the buffer PSTR->MBS, and apply the translation if we need. This function is used in case of REG_ICASE. */static voidbuild_upper_buffer (pstr) re_string_t *pstr;{ int char_idx, end_idx; end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) { int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; if (pstr->trans != NULL) { ch = pstr->trans[ch]; pstr->mbs_case[char_idx] = ch; } if (islower (ch)) pstr->mbs[char_idx] = toupper (ch); else pstr->mbs[char_idx] = ch; } pstr->valid_len = char_idx;}/* Apply TRANS to the buffer in PSTR. */static voidre_string_translate_buffer (pstr) re_string_t *pstr;{ int buf_idx, end_idx; end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) { int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; pstr->mbs_case[buf_idx] = pstr->trans[ch]; } pstr->valid_len = buf_idx;}/* This function re-construct the buffers. Concretely, convert to wide character in case of MB_CUR_MAX > 1, convert to upper case in case of REG_ICASE, apply translation. */static reg_errcode_tre_string_reconstruct (pstr, idx, eflags, newline) re_string_t *pstr; int idx, eflags, newline;{ int offset = idx - pstr->raw_mbs_idx; if (offset < 0) { /* Reset buffer. */#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) memset (&pstr->cur_state, '\0', sizeof (mbstate_t));#endif /* RE_ENABLE_I18N */ pstr->len += pstr->raw_mbs_idx; pstr->stop += pstr->raw_mbs_idx; pstr->valid_len = pstr->raw_mbs_idx = 0; pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF : CONTEXT_NEWLINE | CONTEXT_BEGBUF); if (!MBS_CASE_ALLOCATED (pstr)) pstr->mbs_case = (unsigned char *) pstr->raw_mbs; if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr)) pstr->mbs = (unsigned char *) pstr->raw_mbs; offset = idx; } if (offset != 0) { /* Are the characters which are already checked remain? */ if (offset < pstr->valid_len) { /* Yes, move them to the front of the buffer. */ pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags, newline);#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) memmove (pstr->wcs, pstr->wcs + offset, (pstr->valid_len - offset) * sizeof (wint_t));#endif /* RE_ENABLE_I18N */ if (MBS_ALLOCATED (pstr)) memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset); if (MBS_CASE_ALLOCATED (pstr)) memmove (pstr->mbs_case, pstr->mbs_case + offset, pstr->valid_len - offset); pstr->valid_len -= offset;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -