gconv_simple.c

来自「KPIT GNU Tools is a set of GNU developme」· C语言代码 · 共 1,355 行 · 第 1/3 页
1,355 行
  }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from UTF-8 to the internal (UCS4-like) format.  */#define DEFINE_INIT		0#define DEFINE_FINI		0#define MIN_NEEDED_FROM		1#define MAX_NEEDED_FROM		6#define MIN_NEEDED_TO		4#define FROM_DIRECTION		1#define FROM_LOOP		utf8_internal_loop#define TO_LOOP			utf8_internal_loop /* This is not used.  */#define FUNCTION_NAME		__gconv_transform_utf8_internal#define ONE_DIRECTION		1#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO#define LOOPFCT			FROM_LOOP#define BODY \  {									      \    uint32_t ch;							      \    uint32_t cnt;							      \    uint32_t i;							      \									      \    /* Next input byte.  */						      \    ch = *inptr;							      \									      \    if (ch < 0x80)							      \      {									      \	/* One byte sequence.  */					      \	cnt = 1;							      \	++inptr;							      \      }									      \    else								      \      {									      \ 	if (ch >= 0xc2 && ch < 0xe0)					      \	  {								      \ 	    /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \ 	       otherwise the wide character could have been represented	      \ 	       using a single byte.  */					      \	    cnt = 2;							      \	    ch &= 0x1f;							      \	  }								      \        else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))		      \	  {								      \	    /* We expect three bytes.  */				      \	    cnt = 3;							      \	    ch &= 0x0f;							      \	  }								      \	else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))		      \	  {								      \	    /* We expect four bytes.  */				      \	    cnt = 4;							      \	    ch &= 0x07;							      \	  }								      \	else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))		      \	  {								      \	    /* We expect five bytes.  */				      \	    cnt = 5;							      \	    ch &= 0x03;							      \	  }								      \	else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1))		      \	  {								      \	    /* We expect six bytes.  */					      \	    cnt = 6;							      \	    ch &= 0x01;							      \	  }								      \	else								      \	  {								      \	    int skipped;						      \									      \	    if (! ignore_errors_p ())					      \	      {								      \		/* This is an illegal encoding.  */			      \		result = __GCONV_ILLEGAL_INPUT;				      \		break;							      \	      }								      \									      \	    /* Search the end of this ill-formed UTF-8 character.  This	      \	       is the next byte with (x & 0xc0) != 0x80.  */		      \	     skipped = 0;						      \	     do								      \	       {							      \		 ++inptr;						      \		 ++skipped;						      \	       }							      \	     while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \									      \	     continue;							      \	  }								      \									      \	if (__builtin_expect (inptr + cnt > inend, 0))			      \	  {								      \	    /* We don't have enough input.  But before we report that check   \	       that all the bytes are correct.  */			      \	    for (i = 1; inptr + i < inend; ++i)				      \	      if ((inptr[i] & 0xc0) != 0x80)				      \		break;							      \									      \	    if (__builtin_expect (inptr + i == inend, 1))		      \	      {								      \		result = __GCONV_INCOMPLETE_INPUT;			      \		break;							      \	      }								      \									      \	    if (ignore_errors_p ())					      \	      {								      \		/* Ignore it.  */					      \		inptr += i;						      \		*irreversible = *irreversible + 1;			      \		continue;						      \	      }								      \									      \	    result = __GCONV_ILLEGAL_INPUT;				      \	    break;							      \	  }								      \									      \	/* Read the possible remaining bytes.  */			      \	for (i = 1; i < cnt; ++i)					      \	  {								      \	    uint32_t byte = inptr[i];					      \									      \	    if ((byte & 0xc0) != 0x80)					      \	      /* This is an illegal encoding.  */			      \	      break;							      \									      \	    ch <<= 6;							      \	    ch |= byte & 0x3f;						      \	  }								      \									      \	/* If i < cnt, some trail byte was not >= 0x80, < 0xc0.		      \	   If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could	      \	   have been represented with fewer than cnt bytes.  */		      \	if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0))		      \	  {								      \	    /* This is an illegal encoding.  */				      \	    if (ignore_errors_p ())					      \	      {								      \		inptr += i;						      \		*irreversible = *irreversible + 1;			      \		continue;						      \	      }								      \									      \	    result = __GCONV_ILLEGAL_INPUT;				      \	    break;							      \	  }								      \									      \	inptr += cnt;							      \      }									      \									      \    /* Now adjust the pointers and store the result.  */		      \    *((uint32_t *) outptr) = ch;					      \    outptr = (unsigned char *)((uint32_t *) outptr + 1);		      \  }#define LOOP_NEED_FLAGS#define STORE_REST \  {									      \    /* We store the remaining bytes while converting them into the UCS4	      \       format.  We can assume that the first byte in the buffer is	      \       correct and that it requires a larger number of bytes than there	      \       are in the input buffer.  */					      \    wint_t ch = **inptrp;						      \    size_t cnt;								      \									      \    state->__count = inend - *inptrp;					      \									      \    if (ch >= 0xc2 && ch < 0xe0)					      \      {									      \	/* We expect two bytes.  The first byte cannot be 0xc0 or	      \	   0xc1, otherwise the wide character could have been		      \	   represented using a single byte.  */				      \	cnt = 2;							      \	ch &= 0x1f;							      \      }									      \    else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))			      \      {									      \	/* We expect three bytes.  */					      \	cnt = 3;							      \	ch &= 0x0f;							      \      }									      \    else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))			      \      {									      \	/* We expect four bytes.  */					      \	cnt = 4;							      \	ch &= 0x07;							      \      }									      \    else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))			      \      {									      \	/* We expect five bytes.  */					      \	cnt = 5;							      \	ch &= 0x03;							      \      }									      \    else								      \      {									      \	/* We expect six bytes.  */					      \	cnt = 6;							      \	ch &= 0x01;							      \      }									      \									      \    /* The first byte is already consumed.  */				      \    --cnt;								      \    while (++(*inptrp) < inend)						      \      {									      \	ch <<= 6;							      \	ch |= **inptrp & 0x3f;						      \	--cnt;								      \      }									      \									      \    /* Shift for the so far missing bytes.  */				      \    ch <<= cnt * 6;							      \									      \    /* Store the value.  */						      \    state->__value.__wch = ch;						      \  }#define UNPACK_BYTES \  {									      \    wint_t wch = state->__value.__wch;					      \    size_t ntotal;							      \    inlen = state->__count;						      \									      \    if (state->__value.__wch <= 0x7ff)					      \      {									      \	bytebuf[0] = 0xc0;						      \	ntotal = 2;							      \      }									      \    else if (__builtin_expect (state->__value.__wch <= 0xffff, 1))	      \      {									      \	bytebuf[0] = 0xe0;						      \	ntotal = 3;							      \      }									      \    else if (__builtin_expect (state->__value.__wch < 0x1fffff, 1))	      \      {									      \	bytebuf[0] = 0xf0;						      \	ntotal = 4;							      \      }									      \    else if (__builtin_expect (state->__value.__wch < 0x3ffffff, 1))	      \      {									      \	bytebuf[0] = 0xf8;						      \	ntotal = 5;							      \      }									      \    else								      \      {									      \	bytebuf[0] = 0xfc;						      \	ntotal = 6;							      \      }									      \									      \    do									      \      {									      \	if (--ntotal < inlen)						      \	  bytebuf[ntotal] = 0x80 | (wch & 0x3f);			      \	wch >>= 6;							      \      }									      \    while (ntotal > 1);							      \									      \    bytebuf[0] |= wch;							      \  }#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from UCS2 to the internal (UCS4-like) format.  */#define DEFINE_INIT		0#define DEFINE_FINI		0#define MIN_NEEDED_FROM		2#define MIN_NEEDED_TO		4#define FROM_DIRECTION		1#define FROM_LOOP		ucs2_internal_loop#define TO_LOOP			ucs2_internal_loop /* This is not used.  */#define FUNCTION_NAME		__gconv_transform_ucs2_internal#define ONE_DIRECTION		1#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO#define LOOPFCT			FROM_LOOP#define BODY \  {									      \    uint16_t u1 = *((const uint16_t *) inptr);				      \									      \    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))		      \      {									      \	/* Surrogate characters in UCS-2 input are not valid.  Reject	      \	   them.  (Catching this here is not security relevant.)  */	      \	if (! ignore_errors_p ())					      \	  {								      \	    result = __GCONV_ILLEGAL_INPUT;				      \	    break;							      \	  }								      \	inptr += 2;							      \	*irreversible = *irreversible + 1;				      \	continue;							      \      }									      \									      \    *((uint32_t *) outptr) = u1;					      \    outptr = (unsigned char *)((uint32_t *) outptr + 1);		      \    inptr += 2;								      \  }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from the internal (UCS4-like) format to UCS2.  */#define DEFINE_INIT		0#define DEFINE_FINI		0#define MIN_NEEDED_FROM		4#define MIN_NEEDED_TO		2#define FROM_DIRECTION		1#define FROM_LOOP		internal_ucs2_loop#define TO_LOOP			internal_ucs2_loop /* This is not used.  */#define FUNCTION_NAME		__gconv_transform_internal_ucs2#define ONE_DIRECTION		1#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO#define LOOPFCT			FROM_LOOP#define BODY \  {									      \    uint32_t val = *((const uint32_t *) inptr);				      \									      \    if (__builtin_expect (val >= 0x10000, 0))				      \      {									      \	UNICODE_TAG_HANDLER (val, 4);					      \	STANDARD_ERR_HANDLER (4);					      \      }									      \    else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))	      \      {									      \	/* Surrogate characters in UCS-4 input are not valid.		      \	   We must catch this, because the UCS-2 output might be	      \	   interpreted as UTF-16 by other programs.  If we let		      \	   surrogates pass through, attackers could make a security	      \	   hole exploit by synthesizing any desired plane 1-16		      \	   character.  */						      \	if (! ignore_errors_p ())					      \	  {								      \	    result = __GCONV_ILLEGAL_INPUT;				      \	    break;							      \	  }								      \	inptr += 4;							      \	*irreversible = *irreversible + 1;				      \	continue;							      \      }									      \    else 								      \      {									      \	*((uint16_t *) outptr) = val;					      \	outptr = (unsigned char *)((uint16_t *) outptr + 1);		      \	inptr += 4;							      \      }									      \  }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */#define DEFINE_INIT		0#define DEFINE_FINI		0#define MIN_NEEDED_FROM		2#define MIN_NEEDED_TO		4#define FROM_DIRECTION		1#define FROM_LOOP		ucs2reverse_internal_loop#define TO_LOOP			ucs2reverse_internal_loop/* This is not used.*/#define FUNCTION_NAME		__gconv_transform_ucs2reverse_internal#define ONE_DIRECTION		1#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO#define LOOPFCT			FROM_LOOP#define BODY \  {									      \    uint16_t u1 = bswap_16 (*((const uint16_t *) inptr));		      \									      \    if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0))		      \      {									      \	/* Surrogate characters in UCS-2 input are not valid.  Reject	      \	   them.  (Catching this here is not security relevant.)  */	      \	if (! ignore_errors_p ())					      \	  {								      \	    result = __GCONV_ILLEGAL_INPUT;				      \	    break;							      \	  }								      \	inptr += 2;							      \	*irreversible = *irreversible + 1;				      \	continue;							      \      }									      \									      \    *((uint32_t *) outptr) = u1;					      \    outptr = (unsigned char *)((uint32_t *) outptr + 1);		      \    inptr += 2;								      \  }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */#define DEFINE_INIT		0#define DEFINE_FINI		0#define MIN_NEEDED_FROM		4#define MIN_NEEDED_TO		2#define FROM_DIRECTION		1#define FROM_LOOP		internal_ucs2reverse_loop#define TO_LOOP			internal_ucs2reverse_loop/* This is not used.*/#define FUNCTION_NAME		__gconv_transform_internal_ucs2reverse#define ONE_DIRECTION		1#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO#define LOOPFCT			FROM_LOOP#define BODY \  {									      \    uint32_t val = *((const uint32_t *) inptr);				      \    if (__builtin_expect (val >= 0x10000, 0))				      \      {									      \	UNICODE_TAG_HANDLER (val, 4);					      \	STANDARD_ERR_HANDLER (4);					      \      }									      \    else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0))	      \      {									      \	/* Surrogate characters in UCS-4 input are not valid.		      \	   We must catch this, because the UCS-2 output might be	      \	   interpreted as UTF-16 by other programs.  If we let		      \	   surrogates pass through, attackers could make a security	      \	   hole exploit by synthesizing any desired plane 1-16		      \	   character.  */						      \	if (! ignore_errors_p ())					      \	  {								      \	    result = __GCONV_ILLEGAL_INPUT;				      \	    break;							      \	  }								      \	inptr += 4;							      \	*irreversible = *irreversible + 1;				      \	continue;							      \      }									      \    else 								      \      {									      \	*((uint16_t *) outptr) = bswap_16 (val);			      \	outptr = (unsigned char *)((uint16_t *) outptr + 1);		      \	inptr += 4;							      \      }									      \  }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>
gconv_simple.c - 源码说明

本页面展示了「KPIT GNU Tools is a set of GNU development tools for Renesas microcontrollers.」中的 gconv_simple.c 源码文件，采用 C语言编程语言编写，共 1,355 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与microcontrollers相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?