📄 wchar.c
字号:
size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n, const wchar_t **__restrict src, size_t wn){ register char *p; size_t len, t; __uwchar_t wc; const __uwchar_t *swc; int store; char buf[MB_LEN_MAX]; char m; store = 1; /* NOTE: The following is an AWFUL HACK! In order to support %ls in * printf, we need to be able to compute the number of bytes needed * for the mbs conversion, not to exceed the precision specified. * But if dst is NULL, the return value is the length assuming a * sufficiently sized buffer. So, we allow passing of (char *) src * as dst in order to flag that we really want the length, subject * to the restricted buffer size and no partial conversions. * See wcsnrtombs() as well. */ if (!s || (s == ((char *) src))) { if (!s) { n = SIZE_MAX; } s = buf; store = 0; } t = n; swc = (const __uwchar_t *) *src; assert(swc != NULL); while (wn && t) { wc = *swc; *s = wc; len = 1; if (wc >= 0x80) {#ifdef KUHN if (#if UTF_8_MAX_LEN == 3 /* For plane 0, these are the only defined values.*/ /* Note that we don't need to worry about exceeding */ /* 31 bits as that is the most that UTF-8 provides. */ (wc > 0xfffdU)#else /* UTF_8_MAX_LEN == 6 */ (wc > 0x7fffffffUL) || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)#endif || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) ) ) { __set_errno(EILSEQ); return (size_t) -1; }#else /* KUHN */#if UTF_8_MAX_LEN != 3 if (wc > 0x7fffffffUL) { /* Value too large. */ __set_errno(EILSEQ); return (size_t) -1; }#endif#endif /* KUHN */ wc >>= 1; p = s; do { ++p; } while (wc >>= 5); wc = *swc; if ((len = p - s) > t) { /* Not enough space. */ break; } m = 0x80; while( p>s ) { m = (m >> 1) | 0x80; *--p = (wc & 0x3f) | 0x80; wc >>= 6; } *s |= (m << 1); } else if (wc == 0) { /* End of string. */ swc = NULL; break; } ++swc; --wn; t -= len; if (store) { s += len; } } if (store) { *src = (const wchar_t *) swc; } return n - t;}#endif/**********************************************************************/#ifdef L___mbsnrtowcs/* WARNING: We treat len as SIZE_MAX when dst is NULL! */size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, size_t NMC, size_t len, mbstate_t *__restrict ps) __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, size_t NMC, size_t len, mbstate_t *__restrict ps){ static mbstate_t mbstate; /* Rely on bss 0-init. */ wchar_t wcbuf[1]; const char *s; size_t count; int incr; if (!ps) { ps = &mbstate; }#ifdef __CTYPE_HAS_UTF_8_LOCALES if (ENCODING == __ctype_encoding_utf8) { size_t r; return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1)) != (size_t) -2) ? r : 0; }#endif incr = 1; /* NOTE: The following is an AWFUL HACK! In order to support %s in * wprintf, we need to be able to compute the number of wchars needed * for the mbs conversion, not to exceed the precision specified. * But if dst is NULL, the return value is the length assuming a * sufficiently sized buffer. So, we allow passing of ((wchar_t *)ps) * as dst in order to flag that we really want the length, subject * to the restricted buffer size and no partial conversions. * See _wchar_utf8sntowcs() as well. */ if (!dst || (dst == ((wchar_t *)ps))) { if (!dst) { len = SIZE_MAX; } dst = wcbuf; incr = 0; } /* Since all the following encodings are single-byte encodings... */ if (len > NMC) { len = NMC; } count = len; s = *src;#ifdef __CTYPE_HAS_8_BIT_LOCALES if (ENCODING == __ctype_encoding_8_bit) { wchar_t wc; while (count) { if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */ wc -= 0x80; wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[ (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT] << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))]; if (!wc) { goto BAD; } } if (!(*dst = wc)) { s = NULL; break; } dst += incr; ++s; --count; } if (dst != wcbuf) { *src = s; } return len - count; }#endif#ifdef __UCLIBC_HAS_LOCALE__ assert(ENCODING == __ctype_encoding_7_bit);#endif while (count) { if ((*dst = (unsigned char) *s) == 0) { s = NULL; break; } if (*dst >= 0x80) {#ifdef __CTYPE_HAS_8_BIT_LOCALES BAD:#endif __set_errno(EILSEQ); return (size_t) -1; } ++s; dst += incr; --count; } if (dst != wcbuf) { *src = s; } return len - count;}#endif/**********************************************************************/#ifdef L___wcsnrtombs/* WARNING: We treat len as SIZE_MAX when dst is NULL! *//* Note: We completely ignore ps in all currently supported conversions. * TODO: Check for valid state anyway? */size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, size_t NWC, size_t len, mbstate_t *__restrict ps) __attribute__ ((__weak__, __alias__("__wcsnrtombs")));size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, size_t NWC, size_t len, mbstate_t *__restrict ps){ const __uwchar_t *s; size_t count; int incr; char buf[MB_LEN_MAX];#ifdef __CTYPE_HAS_UTF_8_LOCALES if (ENCODING == __ctype_encoding_utf8) { return _wchar_wcsntoutf8s(dst, len, src, NWC); }#endif /* __CTYPE_HAS_UTF_8_LOCALES */ incr = 1; /* NOTE: The following is an AWFUL HACK! In order to support %ls in * printf, we need to be able to compute the number of bytes needed * for the mbs conversion, not to exceed the precision specified. * But if dst is NULL, the return value is the length assuming a * sufficiently sized buffer. So, we allow passing of (char *) src * as dst in order to flag that we really want the length, subject * to the restricted buffer size and no partial conversions. * See _wchar_wcsntoutf8s() as well. */ if (!dst || (dst == ((char *) src))) { if (!dst) { len = SIZE_MAX; } dst = buf; incr = 0; } /* Since all the following encodings are single-byte encodings... */ if (len > NWC) { len = NWC; } count = len; s = (const __uwchar_t *) *src;#ifdef __CTYPE_HAS_8_BIT_LOCALES if (ENCODING == __ctype_encoding_8_bit) { __uwchar_t wc; __uwchar_t u; while (count) { if ((wc = *s) <= 0x7f) { if (!(*dst = (unsigned char) wc)) { s = NULL; break; } } else { u = 0; if (wc <= Cwc2c_DOMAIN_MAX) { u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)]; u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT) + ((wc >> Cwc2c_TT_SHIFT) & ((1 << Cwc2c_TI_SHIFT)-1))]; u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN + (u << Cwc2c_TT_SHIFT) + (wc & ((1 << Cwc2c_TT_SHIFT)-1))]; }#define __WCHAR_REPLACEMENT_CHAR '?'#ifdef __WCHAR_REPLACEMENT_CHAR *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );#else /* __WCHAR_REPLACEMENT_CHAR */ if (!u) { goto BAD; } *dst = (unsigned char) u;#endif /* __WCHAR_REPLACEMENT_CHAR */ } ++s; dst += incr; --count; } if (dst != buf) { *src = (const wchar_t *) s; } return len - count; }#endif /* __CTYPE_HAS_8_BIT_LOCALES */#ifdef __UCLIBC_HAS_LOCALE__ assert(ENCODING == __ctype_encoding_7_bit);#endif while (count) { if (*s >= 0x80) {#if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR) BAD:#endif __set_errno(EILSEQ); return (size_t) -1; } if ((*dst = (unsigned char) *s) == 0) { s = NULL; break; } ++s; dst += incr; --count; } if (dst != buf) { *src = (const wchar_t *) s; } return len - count;}#endif/**********************************************************************/#ifdef L_wcswidth#ifdef __UCLIBC_MJN3_ONLY__#warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.#warning TODO: Update wcwidth to match latest by Kuhn.#endif#if defined(__UCLIBC_HAS_LOCALE__) && \( defined(__CTYPE_HAS_8_BIT_LOCALES) || defined(__CTYPE_HAS_UTF_8_LOCALES) )static const unsigned char new_idx[] = { 0, 5, 5, 6, 10, 15, 28, 39, 48, 48, 71, 94, 113, 128, 139, 154, 175, 186, 188, 188, 188, 188, 188, 188, 203, 208, 208, 208, 208, 208, 208, 208, 208, 219, 219, 219, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 222, 224, 224, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 233, 233, 233, 233, 233, 233, 233, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 237, 237, 238, 241, 241, 242, 249, 255, };static const unsigned char new_tbl[] = { 0x00, 0x01, 0x20, 0x7f, 0xa0, 0x00, 0x00, 0x50, 0x60, 0x70, 0x00, 0x83, 0x87, 0x88, 0x8a, 0x00, 0x91, 0xa2, 0xa3, 0xba, 0xbb, 0xbe, 0xbf, 0xc0, 0xc1, 0xc3, 0xc4, 0xc5, 0x00, 0x4b, 0x56, 0x70, 0x71, 0xd6, 0xe5, 0xe7, 0xe9, 0xea, 0xee, 0x00, 0x0f, 0x10, 0x11, 0x12, 0x30, 0x4b, 0xa6, 0xb1, 0x00, 0x01, 0x03, 0x3c, 0x3d, 0x41, 0x49, 0x4d, 0x4e, 0x51, 0x55, 0x62, 0x64, 0x81, 0x82, 0xbc, 0xbd, 0xc1, 0xc5, 0xcd, 0xce, 0xe2, 0xe4, 0x00, 0x02, 0x03, 0x3c, 0x3d, 0x41, 0x43, 0x47, 0x49, 0x4b, 0x4e, 0x70, 0x72, 0x81, 0x83, 0xbc, 0xbd, 0xc1, 0xc6, 0xc7, 0xc9, 0xcd, 0xce, 0x00, 0x01, 0x02, 0x3c, 0x3d, 0x3f, 0x40, 0x41, 0x44, 0x4d, 0x4e, 0x56, 0x57, 0x82, 0x83, 0xc0, 0xc1, 0xcd, 0xce, 0x00, 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4e, 0x55, 0x57, 0xbf, 0xc0, 0xc6, 0xc7, 0xcc, 0xce, 0x00, 0x41, 0x44, 0x4d, 0x4e, 0xca, 0xcb, 0xd2, 0xd5, 0xd6, 0xd7, 0x00, 0x31, 0x32, 0x34, 0x3b, 0x47, 0x4f, 0xb1, 0xb2, 0xb4, 0xba, 0xbb, 0xbd, 0xc8, 0xce, 0x00, 0x18, 0x1a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x71, 0x7f, 0x80, 0x85, 0x86, 0x88, 0x90, 0x98, 0x99, 0xbd, 0xc6, 0xc7, 0x00, 0x2d, 0x31, 0x32, 0x33, 0x36, 0x38, 0x39, 0x3a, 0x58, 0x5a, 0x00, 0x60, 0x00, 0x12, 0x15, 0x32, 0x35, 0x52, 0x54, 0x72, 0x74, 0xb7, 0xbe, 0xc6, 0xc7, 0xc9, 0xd4, 0x00, 0x0b, 0x0f, 0xa9, 0xaa, 0x00, 0x0b, 0x10, 0x2a, 0x2f, 0x60, 0x64, 0x6a, 0x70, 0xd0, 0xeb, 0x00, 0x29, 0x2b, 0x00, 0x80, 0x00, 0x2a, 0x30, 0x3f, 0x40, 0x99, 0x9b, 0x00, 0xd0, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x1e, 0x1f, 0x00, 0x00, 0x10, 0x20, 0x24, 0x30, 0x70, 0xff, 0x00, 0x61, 0xe0, 0xe7, 0xf9, 0xfc, };static const signed char new_wtbl[] = { 0, -1, 1, -1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 1, 2, 2, 0, 2, 1, 2, 0, 2, 2, 1, 1, 2, 1, 1, 2, 1, 0, 1, 1, 0, 1, 0, 1, 2, 1, 0, 2, 1, 2, 1, 0, 1, };int wcswidth(const wchar_t *pwcs, size_t n){ int h, l, m, count; wchar_t wc; unsigned char b; if (ENCODING == __ctype_encoding_7_bit) { size_t i; for (i = 0 ; (i < n) && pwcs[i] ; i++) { if (pwcs[i] != ((unsigned char)(pwcs[i]))) { return -1; } } }#ifdef __CTYPE_HAS_8_BIT_LOCALES else if (ENCODING == __ctype_encoding_8_bit) { mbstate_t mbstate; mbstate.__mask = 0; /* Initialize the mbstate. */ if (__wcsnrtombs(NULL, &pwcs, n, SIZE_MAX, &mbstate) == ((size_t) - 1)) { return -1; } }#endif /* __CTYPE_HAS_8_BIT_LOCALES */#if defined(__CTYPE_HAS_UTF_8_LOCALES) && defined(KUHN) /* For stricter handling of allowed unicode values... see comments above. */ else if (ENCODING == __ctype_encoding_utf8) { size_t i; for (i = 0 ; (i < n) && pwcs[i] ; i++) { if ( (((__uwchar_t)((pwcs[i]) - 0xfffeU)) < 2) || (((__uwchar_t)((pwcs[i]) - 0xd800U)) < (0xe000U - 0xd800U)) ) { return -1; } } }#endif /* __CTYPE_HAS_UTF_8_LOCALES */ for (count = 0 ; n && (wc = *pwcs++) ; n--) { if (wc <= 0xff) { /* If we're here, wc != 0. */ if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) { return -1; } ++count; continue; } if (((unsigned int) wc) <= 0xffff) { b = wc & 0xff; h = (wc >> 8); l = new_idx[h]; h = new_idx[h+1]; while ((m = (l+h) >> 1) != l) { if (b >= new_tbl[m]) { l = m; } else { /* wc < tbl[m] */ h = m; } } count += new_wtbl[l]; /* none should be -1. */ continue; } /* Redo this to minimize average number of compares?*/ if (wc >= 0x1d167) { if (wc <= 0x1d1ad) { if ((wc <= 0x1d169 || (wc >= 0x1d173 && (wc <= 0x1d182 || (wc >= 0x1d185 && (wc <= 0x1d18b || (wc >= 0x1d1aa)))))) ) { continue; } } else if (((wc >= 0xe0020) && (wc <= 0xe007f)) || (wc == 0xe0001)) { continue; } else if ((wc >= 0x20000) && (wc <= 0x2ffff)) { ++count; /* need 2.. add one here */ }#if (WCHAR_MAX > 0x7fffffffL) else if (wc > 0x7fffffffL) { return -1; }#endif /* (WCHAR_MAX > 0x7fffffffL) */ } ++count; } return count;}#else /* __UCLIBC_HAS_LOCALE__ */int wcswidth(const wchar_t *pwcs, size_t n){ int count; wchar_t wc; for (count = 0 ; n && (wc = *pwcs++) ; n--) { if (wc <= 0xff) { /* If we're here, wc != 0. */ if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) { return -1; } ++count; continue; } else { return -1; } } return count;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -