strconv.cpp
来自「A*算法 A*算法 A*算法 A*算法A*算法A*算法」· C++ 代码 · 共 2,176 行 · 第 1/5 页
CPP
2,176 行
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
};
size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len = 0;
while (*psz && ((!buf) || (len < n)))
{
unsigned char cc = *psz++;
if (cc != '+')
{
// plain ASCII char
if (buf)
*buf++ = cc;
len++;
}
else if (*psz == '-')
{
// encoded plus sign
if (buf)
*buf++ = cc;
len++;
psz++;
}
else
{
// BASE64 encoded string
bool lsb;
unsigned char c;
unsigned int d, l;
for (lsb = false, d = 0, l = 0;
(cc = utf7unb64[(unsigned char)*psz]) != 0xff; psz++)
{
d <<= 6;
d += cc;
for (l += 6; l >= 8; lsb = !lsb)
{
c = (unsigned char)((d >> (l -= 8)) % 256);
if (lsb)
{
if (buf)
*buf++ |= c;
len ++;
}
else
if (buf)
*buf = (wchar_t)(c << 8);
}
}
if (*psz == '-')
psz++;
}
}
if (buf && (len < n))
*buf = 0;
return len;
}
//
// BASE64 encoding table
//
static const unsigned char utf7enb64[] =
{
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '+', '/'
};
//
// UTF-7 encoding table
//
// 0 - Set D (directly encoded characters)
// 1 - Set O (optional direct characters)
// 2 - whitespace characters (optional)
// 3 - special characters
//
static const unsigned char utf7encode[128] =
{
3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
};
size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len = 0;
while (*psz && ((!buf) || (len < n)))
{
wchar_t cc = *psz++;
if (cc < 0x80 && utf7encode[cc] < 1)
{
// plain ASCII char
if (buf)
*buf++ = (char)cc;
len++;
}
#ifndef WC_UTF16
else if (((wxUint32)cc) > 0xffff)
{
// no surrogate pair generation (yet?)
return (size_t)-1;
}
#endif
else
{
if (buf)
*buf++ = '+';
len++;
if (cc != '+')
{
// BASE64 encode string
unsigned int lsb, d, l;
for (d = 0, l = 0;; psz++)
{
for (lsb = 0; lsb < 2; lsb ++)
{
d <<= 8;
d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
for (l += 8; l >= 6; )
{
l -= 6;
if (buf)
*buf++ = utf7enb64[(d >> l) % 64];
len++;
}
}
cc = *psz;
if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
break;
}
if (l != 0)
{
if (buf)
*buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
len++;
}
}
if (buf)
*buf++ = '-';
len++;
}
}
if (buf && (len < n))
*buf = 0;
return len;
}
// ----------------------------------------------------------------------------
// UTF-8
// ----------------------------------------------------------------------------
static wxUint32 utf8_max[]=
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
// boundaries of the private use area we use to (temporarily) remap invalid
// characters invalid in a UTF-8 encoded string
const wxUint32 wxUnicodePUA = 0x100000;
const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len = 0;
while (*psz && ((!buf) || (len < n)))
{
const char *opsz = psz;
bool invalid = false;
unsigned char cc = *psz++, fc = cc;
unsigned cnt;
for (cnt = 0; fc & 0x80; cnt++)
fc <<= 1;
if (!cnt)
{
// plain ASCII char
if (buf)
*buf++ = cc;
len++;
// escape the escape character for octal escapes
if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
&& cc == '\\' && (!buf || len < n))
{
if (buf)
*buf++ = cc;
len++;
}
}
else
{
cnt--;
if (!cnt)
{
// invalid UTF-8 sequence
invalid = true;
}
else
{
unsigned ocnt = cnt - 1;
wxUint32 res = cc & (0x3f >> cnt);
while (cnt--)
{
cc = *psz;
if ((cc & 0xC0) != 0x80)
{
// invalid UTF-8 sequence
invalid = true;
break;
}
psz++;
res = (res << 6) | (cc & 0x3f);
}
if (invalid || res <= utf8_max[ocnt])
{
// illegal UTF-8 encoding
invalid = true;
}
else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
res >= wxUnicodePUA && res < wxUnicodePUAEnd)
{
// if one of our PUA characters turns up externally
// it must also be treated as an illegal sequence
// (a bit like you have to escape an escape character)
invalid = true;
}
else
{
#ifdef WC_UTF16
// cast is ok because wchar_t == wxUuint16 if WC_UTF16
size_t pa = encode_utf16(res, (wxUint16 *)buf);
if (pa == (size_t)-1)
{
invalid = true;
}
else
{
if (buf)
buf += pa;
len += pa;
}
#else // !WC_UTF16
if (buf)
*buf++ = res;
len++;
#endif // WC_UTF16/!WC_UTF16
}
}
if (invalid)
{
if (m_options & MAP_INVALID_UTF8_TO_PUA)
{
while (opsz < psz && (!buf || len < n))
{
#ifdef WC_UTF16
// cast is ok because wchar_t == wxUuint16 if WC_UTF16
size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
wxASSERT(pa != (size_t)-1);
if (buf)
buf += pa;
opsz++;
len += pa;
#else
if (buf)
*buf++ = wxUnicodePUA + (unsigned char)*opsz;
opsz++;
len++;
#endif
}
}
else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
{
while (opsz < psz && (!buf || len < n))
{
if ( buf && len + 3 < n )
{
unsigned char n = *opsz;
*buf++ = L'\\';
*buf++ = (wchar_t)( L'0' + n / 0100 );
*buf++ = (wchar_t)( L'0' + (n % 0100) / 010 );
*buf++ = (wchar_t)( L'0' + n % 010 );
}
opsz++;
len += 4;
}
}
else // MAP_INVALID_UTF8_NOT
{
return (size_t)-1;
}
}
}
}
if (buf && (len < n))
*buf = 0;
return len;
}
static inline bool isoctal(wchar_t wch)
{
return L'0' <= wch && wch <= L'7';
}
size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len = 0;
while (*psz && ((!buf) || (len < n)))
{
wxUint32 cc;
#ifdef WC_UTF16
// cast is ok for WC_UTF16
size_t pa = decode_utf16((const wxUint16 *)psz, cc);
psz += (pa == (size_t)-1) ? 1 : pa;
#else
cc=(*psz++) & 0x7fffffff;
#endif
if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
&& cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
{
if (buf)
*buf++ = (char)(cc - wxUnicodePUA);
len++;
}
else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
&& cc == L'\\' && psz[0] == L'\\' )
{
if (buf)
*buf++ = (char)cc;
psz++;
len++;
}
else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
cc == L'\\' &&
isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
{
if (buf)
{
*buf++ = (char) ((psz[0] - L'0')*0100 +
(psz[1] - L'0')*010 +
(psz[2] - L'0'));
}
psz += 3;
len++;
}
else
{
unsigned cnt;
for (cnt = 0; cc > utf8_max[cnt]; cnt++) {}
if (!cnt)
{
// plain ASCII char
if (buf)
*buf++ = (char) cc;
len++;
}
else
{
len += cnt + 1;
if (buf)
{
*buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
while (cnt--)
*buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
}
}
}
}
if (buf && (len<n))
*buf = 0;
return len;
}
// ----------------------------------------------------------------------------
// UTF-16
// ----------------------------------------------------------------------------
#ifdef WORDS_BIGENDIAN
#define wxMBConvUTF16straight wxMBConvUTF16BE
#define wxMBConvUTF16swap wxMBConvUTF16LE
#else
#define wxMBConvUTF16swap wxMBConvUTF16BE
#define wxMBConvUTF16straight wxMBConvUTF16LE
#endif
#ifdef WC_UTF16
// copy 16bit MB to 16bit String
size_t wxMBConvUTF16straight::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
size_t len=0;
while (*(wxUint16*)psz && (!buf || len < n))
{
if (buf)
*buf++ = *(wxUint16*)psz;
len++;
psz += sizeof(wxUint16);
}
if (buf && len<n) *buf=0;
return len;
}
// copy 16bit String to 16bit MB
size_t wxMBConvUTF16straight::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
size_t len=0;
while (*psz && (!buf || len < n))
{
if (buf)
{
*(wxUint16*)buf = *psz;
buf += sizeof(wxUint16);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?