regex_internal.c

来自「gnu tar 源码包。 tar 软件是 Unix 系统下的一个打包软件」· C语言代码 · 共 1,744 行 · 第 1/4 页
1,744 行
	      }	    else	      memcpy (pstr->mbs + byte_idx, p, mbclen);	    if (BE (pstr->offsets_needed != 0, 0))	      {		size_t i;		for (i = 0; i < mbclen; ++i)		  pstr->offsets[byte_idx + i] = src_idx + i;	      }	    src_idx += mbclen;	    pstr->wcs[byte_idx++] = wcu;	    /* Write paddings.  */	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)	      pstr->wcs[byte_idx++] = WEOF;	  }	else if (mbclen == (size_t) -1 || mbclen == 0)	  {	    /* It is an invalid character or '\0'.  Just use the byte.  */	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];	    if (BE (pstr->trans != NULL, 0))	      ch = pstr->trans [ch];	    pstr->mbs[byte_idx] = ch;	    if (BE (pstr->offsets_needed != 0, 0))	      pstr->offsets[byte_idx] = src_idx;	    ++src_idx;	    /* And also cast it to wide char.  */	    pstr->wcs[byte_idx++] = (wchar_t) ch;	    if (BE (mbclen == (size_t) -1, 0))	      pstr->cur_state = prev_st;	  }	else	  {	    /* The buffer doesn't have enough space, finish to build.  */	    pstr->cur_state = prev_st;	    break;	  }      }  pstr->valid_len = byte_idx;  pstr->valid_raw_len = src_idx;  return REG_NOERROR;}/* Skip characters until the index becomes greater than NEW_RAW_IDX.   Return the index.  */static Idxinternal_functionre_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc){  mbstate_t prev_st;  Idx rawbuf_idx;  size_t mbclen;  wint_t wc = WEOF;  /* Skip the characters which are not necessary to check.  */  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;       rawbuf_idx < new_raw_idx;)    {      wchar_t wc2;      Idx remain_len;      remain_len = pstr->len - rawbuf_idx;      prev_st = pstr->cur_state;      mbclen = mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,			remain_len, &pstr->cur_state);      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))	{	  /* We treat these cases as a single byte character.  */	  if (mbclen == 0 || remain_len == 0)	    wc = L'\0';	  else	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);	  mbclen = 1;	  pstr->cur_state = prev_st;	}      else	wc = wc2;      /* Then proceed the next character.  */      rawbuf_idx += mbclen;    }  *last_wc = wc;  return rawbuf_idx;}#endif /* RE_ENABLE_I18N  *//* Build the buffer PSTR->MBS, and apply the translation if we need.   This function is used in case of REG_ICASE.  */static voidinternal_functionbuild_upper_buffer (re_string_t *pstr){  Idx char_idx, end_idx;  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)    {      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];      if (BE (pstr->trans != NULL, 0))	ch = pstr->trans[ch];      if (islower (ch))	pstr->mbs[char_idx] = toupper (ch);      else	pstr->mbs[char_idx] = ch;    }  pstr->valid_len = char_idx;  pstr->valid_raw_len = char_idx;}/* Apply TRANS to the buffer in PSTR.  */static voidinternal_functionre_string_translate_buffer (re_string_t *pstr){  Idx buf_idx, end_idx;  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)    {      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];      pstr->mbs[buf_idx] = pstr->trans[ch];    }  pstr->valid_len = buf_idx;  pstr->valid_raw_len = buf_idx;}/* This function re-construct the buffers.   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,   convert to upper case in case of REG_ICASE, apply translation.  */static reg_errcode_tinternal_functionre_string_reconstruct (re_string_t *pstr, Idx idx, int eflags){  Idx offset;  if (BE (pstr->raw_mbs_idx <= idx, 0))    offset = idx - pstr->raw_mbs_idx;  else    {      /* Reset buffer.  */#ifdef RE_ENABLE_I18N      if (pstr->mb_cur_max > 1)	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));#endif /* RE_ENABLE_I18N */      pstr->len = pstr->raw_len;      pstr->stop = pstr->raw_stop;      pstr->valid_len = 0;      pstr->raw_mbs_idx = 0;      pstr->valid_raw_len = 0;      pstr->offsets_needed = 0;      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);      if (!pstr->mbs_allocated)	pstr->mbs = (unsigned char *) pstr->raw_mbs;      offset = idx;    }  if (BE (offset != 0, 1))    {      /* Should the already checked characters be kept?  */      if (BE (offset < pstr->valid_raw_len, 1))	{	  /* Yes, move them to the front of the buffer.  */#ifdef RE_ENABLE_I18N	  if (BE (pstr->offsets_needed, 0))	    {	      Idx low = 0, high = pstr->valid_len, mid;	      do		{		  mid = (high + low) / 2;		  if (pstr->offsets[mid] > offset)		    high = mid;		  else if (pstr->offsets[mid] < offset)		    low = mid + 1;		  else		    break;		}	      while (low < high);	      if (pstr->offsets[mid] < offset)		++mid;	      pstr->tip_context = re_string_context_at (pstr, mid - 1,							eflags);	      /* This can be quite complicated, so handle specially		 only the common and easy case where the character with		 different length representation of lower and upper		 case is present at or after offset.  */	      if (pstr->valid_len > offset		  && mid == offset && pstr->offsets[mid] == offset)		{		  memmove (pstr->wcs, pstr->wcs + offset,			   (pstr->valid_len - offset) * sizeof (wint_t));		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);		  pstr->valid_len -= offset;		  pstr->valid_raw_len -= offset;		  for (low = 0; low < pstr->valid_len; low++)		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;		}	      else		{		  /* Otherwise, just find out how long the partial multibyte		     character at offset is and fill it with WEOF/255.  */		  pstr->len = pstr->raw_len - idx + offset;		  pstr->stop = pstr->raw_stop - idx + offset;		  pstr->offsets_needed = 0;		  while (mid > 0 && pstr->offsets[mid - 1] == offset)		    --mid;		  while (mid < pstr->valid_len)		    if (pstr->wcs[mid] != WEOF)		      break;		    else		      ++mid;		  if (mid == pstr->valid_len)		    pstr->valid_len = 0;		  else		    {		      pstr->valid_len = pstr->offsets[mid] - offset;		      if (pstr->valid_len)			{			  for (low = 0; low < pstr->valid_len; ++low)			    pstr->wcs[low] = WEOF;			  memset (pstr->mbs, 255, pstr->valid_len);			}		    }		  pstr->valid_raw_len = pstr->valid_len;		}	    }	  else#endif	    {	      pstr->tip_context = re_string_context_at (pstr, offset - 1,							eflags);#ifdef RE_ENABLE_I18N	      if (pstr->mb_cur_max > 1)		memmove (pstr->wcs, pstr->wcs + offset,			 (pstr->valid_len - offset) * sizeof (wint_t));#endif /* RE_ENABLE_I18N */	      if (BE (pstr->mbs_allocated, 0))		memmove (pstr->mbs, pstr->mbs + offset,			 pstr->valid_len - offset);	      pstr->valid_len -= offset;	      pstr->valid_raw_len -= offset;#if DEBUG	      assert (pstr->valid_len > 0);#endif	    }	}      else	{	  /* No, skip all characters until IDX.  */	  Idx prev_valid_len = pstr->valid_len;#ifdef RE_ENABLE_I18N	  if (BE (pstr->offsets_needed, 0))	    {	      pstr->len = pstr->raw_len - idx + offset;	      pstr->stop = pstr->raw_stop - idx + offset;	      pstr->offsets_needed = 0;	    }#endif	  pstr->valid_len = 0;#ifdef RE_ENABLE_I18N	  if (pstr->mb_cur_max > 1)	    {	      Idx wcs_idx;	      wint_t wc = WEOF;	      if (pstr->is_utf8)		{		  const unsigned char *raw, *p, *end;		  /* Special case UTF-8.  Multi-byte chars start with any		     byte other than 0x80 - 0xbf.  */		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;		  end = raw + (offset - pstr->mb_cur_max);		  if (end < pstr->raw_mbs)		    end = pstr->raw_mbs;		  p = raw + offset - 1;#ifdef _LIBC		  /* We know the wchar_t encoding is UCS4, so for the simple		     case, ASCII characters, skip the conversion step.  */		  if (isascii (*p) && BE (pstr->trans == NULL, 1))		    {		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));		      /* pstr->valid_len = 0; */		      wc = (wchar_t) *p;		    }		  else#endif		    for (; p >= end; --p)		      if ((*p & 0xc0) != 0x80)			{			  mbstate_t cur_state;			  wchar_t wc2;			  Idx mlen = raw + pstr->len - p;			  unsigned char buf[6];			  size_t mbclen;			  if (BE (pstr->trans != NULL, 0))			    {			      int i = mlen < 6 ? mlen : 6;			      while (--i >= 0)				buf[i] = pstr->trans[p[i]];			    }			  /* XXX Don't use mbrtowc, we know which conversion			     to use (UTF-8 -> UCS4).  */			  memset (&cur_state, 0, sizeof (cur_state));			  mbclen = mbrtowc (&wc2, (const char *) p, mlen,					    &cur_state);			  if (raw + offset - p <= mbclen			      && mbclen < (size_t) -2)			    {			      memset (&pstr->cur_state, '\0',				      sizeof (mbstate_t));			      pstr->valid_len = mbclen - (raw + offset - p);			      wc = wc2;			    }			  break;			}		}	      if (wc == WEOF)		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;	      if (wc == WEOF)		pstr->tip_context		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);	      else		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)				      && IS_WIDE_WORD_CHAR (wc))				     ? CONTEXT_WORD				     : ((IS_WIDE_NEWLINE (wc)					 && pstr->newline_anchor)					? CONTEXT_NEWLINE : 0));	      if (BE (pstr->valid_len, 0))		{		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)		    pstr->wcs[wcs_idx] = WEOF;		  if (pstr->mbs_allocated)		    memset (pstr->mbs, 255, pstr->valid_len);		}	      pstr->valid_raw_len = pstr->valid_len;	    }	  else#endif /* RE_ENABLE_I18N */	    {	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];	      pstr->valid_raw_len = 0;	      if (pstr->trans)		c = pstr->trans[c];	      pstr->tip_context = (bitset_contain (pstr->word_char, c)				   ? CONTEXT_WORD				   : ((IS_NEWLINE (c) && pstr->newline_anchor)				      ? CONTEXT_NEWLINE : 0));	    }	}      if (!BE (pstr->mbs_allocated, 0))	pstr->mbs += offset;    }  pstr->raw_mbs_idx = idx;  pstr->len -= offset;  pstr->stop -= offset;  /* Then build the buffers.  */#ifdef RE_ENABLE_I18N  if (pstr->mb_cur_max > 1)    {      if (pstr->icase)	{	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);	  if (BE (ret != REG_NOERROR, 0))	    return ret;	}      else	build_wcs_buffer (pstr);    }  else#endif /* RE_ENABLE_I18N */    if (BE (pstr->mbs_allocated, 0))      {	if (pstr->icase)	  build_upper_buffer (pstr);	else if (pstr->trans != NULL)	  re_string_translate_buffer (pstr);      }    else      pstr->valid_len = pstr->len;  pstr->cur_idx = 0;  return REG_NOERROR;}static unsigned charinternal_function __attribute ((pure))re_string_peek_byte_case (const re_string_t *pstr, Idx idx){  int ch;  Idx off;  /* Handle the common (easiest) cases first.  */  if (BE (!pstr->mbs_allocated, 1))    return re_string_peek_byte (pstr, idx);#ifdef RE_ENABLE_I18N  if (pstr->mb_cur_max > 1      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))    return re_string_peek_byte (pstr, idx);#endif  off = pstr->cur_idx + idx;#ifdef RE_ENABLE_I18N  if (pstr->offsets_needed)    off = pstr->offsets[off];#endif  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];#ifdef RE_ENABLE_I18N  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I     this function returns CAPITAL LETTER I instead of first byte of     DOTLESS SMALL LETTER I.  The latter would confuse the parser,     since peek_byte_case doesn't advance cur_idx in any way.  */  if (pstr->offsets_needed && !isascii (ch))    return re_string_peek_byte (pstr, idx);#endif  return ch;}static unsigned charinternal_function __attribute ((pure))
regex_internal.c - 源码说明

本页面展示了「gnu tar 源码包。 tar 软件是 Unix 系统下的一个打包软件」中的 regex_internal.c 源码文件，采用 C语言编程语言编写，共 1,744 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与tar相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?