📄 gconv_simple.c
字号:
}#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from UTF-8 to the internal (UCS4-like) format. */#define DEFINE_INIT 0#define DEFINE_FINI 0#define MIN_NEEDED_FROM 1#define MAX_NEEDED_FROM 6#define MIN_NEEDED_TO 4#define FROM_DIRECTION 1#define FROM_LOOP utf8_internal_loop#define TO_LOOP utf8_internal_loop /* This is not used. */#define FUNCTION_NAME __gconv_transform_utf8_internal#define ONE_DIRECTION 1#define MIN_NEEDED_INPUT MIN_NEEDED_FROM#define MAX_NEEDED_INPUT MAX_NEEDED_FROM#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO#define LOOPFCT FROM_LOOP#define BODY \ { \ uint32_t ch; \ uint32_t cnt; \ uint32_t i; \ \ /* Next input byte. */ \ ch = *inptr; \ \ if (ch < 0x80) \ { \ /* One byte sequence. */ \ cnt = 1; \ ++inptr; \ } \ else \ { \ if (ch >= 0xc2 && ch < 0xe0) \ { \ /* We expect two bytes. The first byte cannot be 0xc0 or 0xc1, \ otherwise the wide character could have been represented \ using a single byte. */ \ cnt = 2; \ ch &= 0x1f; \ } \ else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ { \ /* We expect three bytes. */ \ cnt = 3; \ ch &= 0x0f; \ } \ else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ { \ /* We expect four bytes. */ \ cnt = 4; \ ch &= 0x07; \ } \ else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ { \ /* We expect five bytes. */ \ cnt = 5; \ ch &= 0x03; \ } \ else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \ { \ /* We expect six bytes. */ \ cnt = 6; \ ch &= 0x01; \ } \ else \ { \ int skipped; \ \ if (! ignore_errors_p ()) \ { \ /* This is an illegal encoding. */ \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ \ /* Search the end of this ill-formed UTF-8 character. This \ is the next byte with (x & 0xc0) != 0x80. */ \ skipped = 0; \ do \ { \ ++inptr; \ ++skipped; \ } \ while (inptr < inend && (*inptr & 0xc0) == 0x80 && skipped < 5); \ \ continue; \ } \ \ if (__builtin_expect (inptr + cnt > inend, 0)) \ { \ /* We don't have enough input. But before we report that check \ that all the bytes are correct. */ \ for (i = 1; inptr + i < inend; ++i) \ if ((inptr[i] & 0xc0) != 0x80) \ break; \ \ if (__builtin_expect (inptr + i == inend, 1)) \ { \ result = __GCONV_INCOMPLETE_INPUT; \ break; \ } \ \ if (ignore_errors_p ()) \ { \ /* Ignore it. */ \ inptr += i; \ *irreversible = *irreversible + 1; \ continue; \ } \ \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ \ /* Read the possible remaining bytes. */ \ for (i = 1; i < cnt; ++i) \ { \ uint32_t byte = inptr[i]; \ \ if ((byte & 0xc0) != 0x80) \ /* This is an illegal encoding. */ \ break; \ \ ch <<= 6; \ ch |= byte & 0x3f; \ } \ \ /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ have been represented with fewer than cnt bytes. */ \ if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \ { \ /* This is an illegal encoding. */ \ if (ignore_errors_p ()) \ { \ inptr += i; \ *irreversible = *irreversible + 1; \ continue; \ } \ \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ \ inptr += cnt; \ } \ \ /* Now adjust the pointers and store the result. */ \ *((uint32_t *) outptr) = ch; \ outptr = (unsigned char *)((uint32_t *) outptr + 1); \ }#define LOOP_NEED_FLAGS#define STORE_REST \ { \ /* We store the remaining bytes while converting them into the UCS4 \ format. We can assume that the first byte in the buffer is \ correct and that it requires a larger number of bytes than there \ are in the input buffer. */ \ wint_t ch = **inptrp; \ size_t cnt; \ \ state->__count = inend - *inptrp; \ \ if (ch >= 0xc2 && ch < 0xe0) \ { \ /* We expect two bytes. The first byte cannot be 0xc0 or \ 0xc1, otherwise the wide character could have been \ represented using a single byte. */ \ cnt = 2; \ ch &= 0x1f; \ } \ else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ { \ /* We expect three bytes. */ \ cnt = 3; \ ch &= 0x0f; \ } \ else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \ { \ /* We expect four bytes. */ \ cnt = 4; \ ch &= 0x07; \ } \ else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \ { \ /* We expect five bytes. */ \ cnt = 5; \ ch &= 0x03; \ } \ else \ { \ /* We expect six bytes. */ \ cnt = 6; \ ch &= 0x01; \ } \ \ /* The first byte is already consumed. */ \ --cnt; \ while (++(*inptrp) < inend) \ { \ ch <<= 6; \ ch |= **inptrp & 0x3f; \ --cnt; \ } \ \ /* Shift for the so far missing bytes. */ \ ch <<= cnt * 6; \ \ /* Store the value. */ \ state->__value.__wch = ch; \ }#define UNPACK_BYTES \ { \ wint_t wch = state->__value.__wch; \ size_t ntotal; \ inlen = state->__count; \ \ if (state->__value.__wch <= 0x7ff) \ { \ bytebuf[0] = 0xc0; \ ntotal = 2; \ } \ else if (__builtin_expect (state->__value.__wch <= 0xffff, 1)) \ { \ bytebuf[0] = 0xe0; \ ntotal = 3; \ } \ else if (__builtin_expect (state->__value.__wch < 0x1fffff, 1)) \ { \ bytebuf[0] = 0xf0; \ ntotal = 4; \ } \ else if (__builtin_expect (state->__value.__wch < 0x3ffffff, 1)) \ { \ bytebuf[0] = 0xf8; \ ntotal = 5; \ } \ else \ { \ bytebuf[0] = 0xfc; \ ntotal = 6; \ } \ \ do \ { \ if (--ntotal < inlen) \ bytebuf[ntotal] = 0x80 | (wch & 0x3f); \ wch >>= 6; \ } \ while (ntotal > 1); \ \ bytebuf[0] |= wch; \ }#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from UCS2 to the internal (UCS4-like) format. */#define DEFINE_INIT 0#define DEFINE_FINI 0#define MIN_NEEDED_FROM 2#define MIN_NEEDED_TO 4#define FROM_DIRECTION 1#define FROM_LOOP ucs2_internal_loop#define TO_LOOP ucs2_internal_loop /* This is not used. */#define FUNCTION_NAME __gconv_transform_ucs2_internal#define ONE_DIRECTION 1#define MIN_NEEDED_INPUT MIN_NEEDED_FROM#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO#define LOOPFCT FROM_LOOP#define BODY \ { \ uint16_t u1 = *((const uint16_t *) inptr); \ \ if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ { \ /* Surrogate characters in UCS-2 input are not valid. Reject \ them. (Catching this here is not security relevant.) */ \ if (! ignore_errors_p ()) \ { \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ inptr += 2; \ *irreversible = *irreversible + 1; \ continue; \ } \ \ *((uint32_t *) outptr) = u1; \ outptr = (unsigned char *)((uint32_t *) outptr + 1); \ inptr += 2; \ }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from the internal (UCS4-like) format to UCS2. */#define DEFINE_INIT 0#define DEFINE_FINI 0#define MIN_NEEDED_FROM 4#define MIN_NEEDED_TO 2#define FROM_DIRECTION 1#define FROM_LOOP internal_ucs2_loop#define TO_LOOP internal_ucs2_loop /* This is not used. */#define FUNCTION_NAME __gconv_transform_internal_ucs2#define ONE_DIRECTION 1#define MIN_NEEDED_INPUT MIN_NEEDED_FROM#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO#define LOOPFCT FROM_LOOP#define BODY \ { \ uint32_t val = *((const uint32_t *) inptr); \ \ if (__builtin_expect (val >= 0x10000, 0)) \ { \ UNICODE_TAG_HANDLER (val, 4); \ STANDARD_ERR_HANDLER (4); \ } \ else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ { \ /* Surrogate characters in UCS-4 input are not valid. \ We must catch this, because the UCS-2 output might be \ interpreted as UTF-16 by other programs. If we let \ surrogates pass through, attackers could make a security \ hole exploit by synthesizing any desired plane 1-16 \ character. */ \ if (! ignore_errors_p ()) \ { \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ inptr += 4; \ *irreversible = *irreversible + 1; \ continue; \ } \ else \ { \ *((uint16_t *) outptr) = val; \ outptr = (unsigned char *)((uint16_t *) outptr + 1); \ inptr += 4; \ } \ }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */#define DEFINE_INIT 0#define DEFINE_FINI 0#define MIN_NEEDED_FROM 2#define MIN_NEEDED_TO 4#define FROM_DIRECTION 1#define FROM_LOOP ucs2reverse_internal_loop#define TO_LOOP ucs2reverse_internal_loop/* This is not used.*/#define FUNCTION_NAME __gconv_transform_ucs2reverse_internal#define ONE_DIRECTION 1#define MIN_NEEDED_INPUT MIN_NEEDED_FROM#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO#define LOOPFCT FROM_LOOP#define BODY \ { \ uint16_t u1 = bswap_16 (*((const uint16_t *) inptr)); \ \ if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ { \ /* Surrogate characters in UCS-2 input are not valid. Reject \ them. (Catching this here is not security relevant.) */ \ if (! ignore_errors_p ()) \ { \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ inptr += 2; \ *irreversible = *irreversible + 1; \ continue; \ } \ \ *((uint32_t *) outptr) = u1; \ outptr = (unsigned char *)((uint32_t *) outptr + 1); \ inptr += 2; \ }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */#define DEFINE_INIT 0#define DEFINE_FINI 0#define MIN_NEEDED_FROM 4#define MIN_NEEDED_TO 2#define FROM_DIRECTION 1#define FROM_LOOP internal_ucs2reverse_loop#define TO_LOOP internal_ucs2reverse_loop/* This is not used.*/#define FUNCTION_NAME __gconv_transform_internal_ucs2reverse#define ONE_DIRECTION 1#define MIN_NEEDED_INPUT MIN_NEEDED_FROM#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO#define LOOPFCT FROM_LOOP#define BODY \ { \ uint32_t val = *((const uint32_t *) inptr); \ if (__builtin_expect (val >= 0x10000, 0)) \ { \ UNICODE_TAG_HANDLER (val, 4); \ STANDARD_ERR_HANDLER (4); \ } \ else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ { \ /* Surrogate characters in UCS-4 input are not valid. \ We must catch this, because the UCS-2 output might be \ interpreted as UTF-16 by other programs. If we let \ surrogates pass through, attackers could make a security \ hole exploit by synthesizing any desired plane 1-16 \ character. */ \ if (! ignore_errors_p ()) \ { \ result = __GCONV_ILLEGAL_INPUT; \ break; \ } \ inptr += 4; \ *irreversible = *irreversible + 1; \ continue; \ } \ else \ { \ *((uint16_t *) outptr) = bswap_16 (val); \ outptr = (unsigned char *)((uint16_t *) outptr + 1); \ inptr += 4; \ } \ }#define LOOP_NEED_FLAGS#include <iconv/loop.c>#include <iconv/skeleton.c>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -