📄 utf16
字号:
// codecvt facet for UTF-16 multibyte code, UCS wide-character code
#pragma once
#ifndef _CVT_UTF16_
#define _CVT_UTF16_
#ifndef RC_INVOKED
#include <locale>
#include <cwchar>
#pragma pack(push,_CRT_PACKING)
#pragma warning(push,3)
#pragma warning(disable: 4127 6326)
namespace stdext {
namespace cvt {
#ifndef _LITTLE_FIRST
#define _LITTLE_FIRST 1
#define _BIG_FIRST 2
#define _BYTES_PER_WORD 4
enum codecvt_mode {
consume_header = 4,
generate_header = 2,
little_endian = 1};
#endif /* _LITTLE_FIRST */
typedef _CSTD mbstate_t _Statype;
// TEMPLATE CLASS codecvt_utf16
template<class _Elem,
unsigned long _Maxcode = 0x10ffff,
codecvt_mode _Mode = (codecvt_mode)0>
class codecvt_utf16
: public _STD codecvt<_Elem, char, _Statype>
{ // facet for converting between _Elem and UTF-16 multibyte sequences
enum {_Bytes_per_word = 2};
public:
typedef _STD codecvt<_Elem, char, _Statype> _Mybase;
typedef typename _Mybase::result result;
typedef char _Byte;
typedef _Elem intern_type;
typedef _Byte extern_type;
typedef _Statype state_type;
explicit codecvt_utf16(size_t _Refs = 0)
: _Mybase(_Refs)
{ // construct with ref count
}
virtual ~codecvt_utf16()
{ // destroy the object
}
protected:
virtual result do_in(_Statype& _State,
const _Byte *_First1, const _Byte *_Last1, const _Byte *& _Mid1,
_Elem *_First2, _Elem *_Last2, _Elem *& _Mid2) const
{ // convert bytes [_First1, _Last1) to [_First2, _Last)
char *_Pstate = (char *)&_State;
_Mid1 = _First1;
_Mid2 = _First2;
for (; _Bytes_per_word <= _Last1 - _Mid1 && _Mid2 != _Last2; )
{ // convert a multibyte sequence
unsigned char *_Ptr = (unsigned char *)_Mid1;
unsigned long _Ch;
unsigned short _Ch0, _Ch1;
if (*_Pstate == _LITTLE_FIRST)
_Ch0 = (unsigned short)(_Ptr[1] << 8 | _Ptr[0]);
else if (*_Pstate == _BIG_FIRST)
_Ch0 = (unsigned short)(_Ptr[0] << 8 | _Ptr[1]);
else
{ // no header seen yet, try preferred mode
unsigned char _Default_endian = (_Mode & little_endian) != 0
? _LITTLE_FIRST : _BIG_FIRST;
if ((_Mode & little_endian) != 0)
_Ch0 = (unsigned short)(_Ptr[1] << 8 | _Ptr[0]);
else
_Ch0 = (unsigned short)(_Ptr[0] << 8 | _Ptr[1]);
if ((_Mode & consume_header) == 0
|| _Ch0 != 0xfeff && _Ch0 != 0xfffe)
*_Pstate = _Default_endian;
else
{ // consume header, fixate on endianness, and retry
_Mid1 += _Bytes_per_word;
*_Pstate = (char)(_Ch0 = 0xfeff
? _Default_endian
: (unsigned char)(3 - _Default_endian));
result _Ans = do_in(_State, _Mid1, _Last1, _Mid1,
_First2, _Last2, _Mid2);
if (_Ans == _Mybase::partial)
{ // not enough bytes, roll back header
*_Pstate = 0;
_Mid1 = _First1;
}
return (_Ans);
}
}
if (_Ch0 < 0xd800 || 0xdc00 <= _Ch0)
{ // one word, consume bytes
_Mid1 += _Bytes_per_word;
_Ch = _Ch0;
}
else if (_Last1 - _Mid1 < 2 * _Bytes_per_word)
break;
else
{ // get second word
if (*_Pstate == _LITTLE_FIRST)
_Ch1 = (unsigned short)(_Ptr[3] << 8 | _Ptr[2]);
else
_Ch1 = (unsigned short)(_Ptr[2] << 8 | _Ptr[3]);
if (_Ch1 < 0xdc00 || 0xe000 <= _Ch1)
return (_Mybase::error);
_Mid1 += 2 * _Bytes_per_word;
_Ch = (unsigned long)(_Ch0 - 0xd800 + 0x0040) << 10
| (_Ch1 - 0xdc00);
}
if (_Maxcode < _Ch)
return (_Mybase::error); // code too large
*_Mid2++ = (_Elem)_Ch;
}
return (_First1 == _Mid1 ? _Mybase::partial : _Mybase::ok);
}
virtual result do_out(_Statype& _State,
const _Elem *_First1, const _Elem *_Last1, const _Elem *& _Mid1,
_Byte *_First2, _Byte *_Last2, _Byte *& _Mid2) const
{ // convert [_First1, _Last1) to bytes [_First2, _Last)
char *_Pstate = (char *)&_State;
_Mid1 = _First1;
_Mid2 = _First2;
if (*_Pstate == 0)
{ // determine endianness once, maybe generate header
*_Pstate = (_Mode & little_endian) != 0
? _LITTLE_FIRST : _BIG_FIRST;
if ((_Mode & generate_header) == 0)
;
else if (_Last2 - _Mid2 < 3 * _Bytes_per_word)
return (_Mybase::partial); // not enough room for both
else if (*_Pstate == _LITTLE_FIRST)
{ // put header LS byte first
*_Mid2++ = (_Byte)(unsigned char)0xff;
*_Mid2++ = (_Byte)(unsigned char)0xfe;
}
else
{ // put header MS byte first
*_Mid2++ = (_Byte)(unsigned char)0xfe;
*_Mid2++ = (_Byte)(unsigned char)0xff;
}
}
for (; _Mid1 != _Last1 && _Bytes_per_word <= _Last2 - _Mid2; )
{ // convert and put a wide char
bool _Extra = false;
unsigned long _Ch = (unsigned long)*_Mid1++;
if ((_Maxcode < 0x10ffff ? _Maxcode : 0x10ffff) < _Ch)
return (_Mybase::error); // value too large
else if (_Ch <= 0xffff)
{ // one word, can't be code for first of two
if (0xd800 <= _Ch && _Ch < 0xdc00)
return (_Mybase::error);
}
else if (_Last2 - _Mid2 < 2 * _Bytes_per_word)
{ // not enough room for two-word output, back up
--_Mid1;
return (_Mybase::partial);
}
else
_Extra = true;
if (*_Pstate == _LITTLE_FIRST)
if (!_Extra)
{ // put a single word LS byte first
*_Mid2++ = (_Byte)_Ch;
*_Mid2++ = (_Byte)(_Ch >> 8);
}
else
{ // put a pair of words LS byte first
unsigned short _Ch0 = (unsigned short)(0xd800
| (unsigned short)(_Ch >> 10) - 0x0040);
*_Mid2++ = (_Byte)_Ch0;
*_Mid2++ = (_Byte)(_Ch0 >> 8);
_Ch0 = (unsigned short)(0xdc00
| (unsigned short)_Ch & 0x03ff);
*_Mid2++ = (_Byte)_Ch0;
*_Mid2++ = (_Byte)(_Ch0 >> 8);
}
else
if (!_Extra)
{ // put a single word MS byte first
*_Mid2++ = (_Byte)(_Ch >> 8);
*_Mid2++ = (_Byte)_Ch;
}
else
{ // put a pair of words MS byte first
unsigned short _Ch0 = (unsigned short)(0xd800
| (unsigned short)(_Ch >> 10) - 0x0040);
*_Mid2++ = (_Byte)(_Ch0 >> 8);
*_Mid2++ = (_Byte)_Ch0;
_Ch0 = (unsigned short)(0xdc00
| (unsigned short)_Ch & 0x03ff);
*_Mid2++ = (_Byte)(_Ch0 >> 8);
*_Mid2++ = (_Byte)_Ch0;
}
}
return (_First1 == _Mid1 ? _Mybase::partial : _Mybase::ok);
}
virtual result do_unshift(_Statype&,
_Byte *_First2, _Byte *, _Byte *& _Mid2) const
{ // generate bytes to return to default shift state
_Mid2 = _First2;
return (_Mybase::ok);
}
virtual int do_length(const _Statype& _State, const _Byte *_First1,
const _Byte *_Last1, size_t _Count) const _THROW0()
{ // return min(_Count, converted length of bytes [_First1, _Last1))
size_t _Wchars = 0;
_Statype _Mystate = _State;
for (; _Wchars < _Count && _First1 != _Last1; )
{ // convert another wide char
const _Byte *_Mid1;
_Elem *_Mid2;
_Elem _Ch;
switch (do_in(_Mystate, _First1, _Last1, _Mid1,
&_Ch, &_Ch + 1, _Mid2))
{ // test result of single wide-char conversion
case _Mybase::noconv:
return ((int)(_Wchars + (_Last1 - _First1)));
case _Mybase::ok:
if (_Mid2 == &_Ch + 1)
++_Wchars; // replacement do_in might not convert one
_First1 = _Mid1;
break;
default:
return ((int)_Wchars); // error or partial
}
}
return ((int)_Wchars);
}
virtual bool do_always_noconv() const _THROW0()
{ // return true if conversions never change input
return (false);
}
virtual int do_max_length() const _THROW0()
{ // return maximum length required for a conversion
return ((_Mode & (consume_header | generate_header)) != 0
? 3 * _Bytes_per_word : 6 * _Bytes_per_word);
}
virtual int do_encoding() const _THROW0()
{ // return length of code sequence (from codecvt)
return ((_Mode & (consume_header | generate_header)) != 0
? -1 : 0); // -1 => state dependent, 0 => varying length
}
};
} // namespace cvt
} // namespace stdext
#ifdef _TEST_IT
#define NCHARS 0x10000
#define MYWC_MAX 0x10ffff
#define MYFILE "utf16"
#define MYNAME stdext::cvt::codecvt_utf16<Mywchar>
#include <cvt/xtest>
#endif /* _TEST_IT */
#pragma warning(pop)
#pragma pack(pop)
#endif /* RC_INVOKED */
#endif /* _CVT_UTF16_ */
/*
* Copyright (c) 1992-2009 by P.J. Plauger. ALL RIGHTS RESERVED.
* Consult your license regarding permissions and restrictions.
V5.20:0009 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -