codecvt

来自「C语言库函数的原型,有用的拿去」· 代码 · 共 753 行 · 第 1/2 页
TXT
753 行
// codecvt standard header
#pragma once
#ifndef _CODECVT_
#define _CODECVT_
#ifndef RC_INVOKED
#include <locale>
#include <cwchar>

 #pragma pack(push,_CRT_PACKING)
 #pragma warning(push,3)

 #pragma warning(disable: 6326)

_STD_BEGIN
#define _LITTLE_FIRST	1
#define _BIG_FIRST		2
#define _BYTES_PER_WORD	4

enum codecvt_mode {
	consume_header = 4,
	generate_header = 2,
	little_endian = 1};

typedef _CSTD mbstate_t _Statype;

		// TEMPLATE CLASS codecvt_utf8
template<class _Elem,
	unsigned long _Mymax = 0x10ffff,
	codecvt_mode _Mymode = (codecvt_mode)0>
	class codecvt_utf8
	: public _STD codecvt<_Elem, char, _Statype>
	{	// facet for converting between _Elem and UTF-8 byte sequences
public:
 	typedef _STD codecvt<_Elem, char, _Statype> _Mybase;
	typedef typename _Mybase::result result;
	typedef char _Byte;
	typedef _Elem intern_type;
	typedef _Byte extern_type;
	typedef _Statype state_type;

	explicit codecvt_utf8(size_t _Refs = 0)
		: _Mybase(_Refs)
		{	// construct with ref count
		}

	virtual ~codecvt_utf8()
		{	// destroy the object
		}

protected:
	virtual result do_in(_Statype& _State,
		const _Byte *_First1, const _Byte *_Last1, const _Byte *& _Mid1,
		_Elem *_First2, _Elem *_Last2, _Elem *& _Mid2) const
		{	// convert bytes [_First1, _Last1) to [_First2, _Last)
		char *_Pstate = (char *)&_State;
		_Mid1 = _First1;
		_Mid2 = _First2;

		for (; _Mid1 != _Last1 && _Mid2 != _Last2; )
			{	// convert a multibyte sequence
			unsigned char _By = (unsigned char)*_Mid1;
			unsigned long _Ch;
			int _Nextra;

			if (_By < 0x80)
				_Ch = _By, _Nextra = 0;
			else if (_By < 0xc0)
				{	// 0x80-0xdf not first byte
				++_Mid1;
				return (_Mybase::error);
				}
			else if (_By < 0xe0)
				_Ch = _By & 0x1f, _Nextra = 1;
			else if (_By < 0xf0)
				_Ch = _By & 0x0f, _Nextra = 2;
			else if (_By < 0xf8)
				_Ch = _By & 0x07, _Nextra = 3;
			else
				_Ch = _By & 0x03, _Nextra = _By < 0xfc ? 4 : 5;

			if (_Nextra == 0)
				++_Mid1;
			else if (_Last1 - _Mid1 < _Nextra + 1)
				break;	// not enough input
			else
				for (++_Mid1; 0 < _Nextra; --_Nextra, ++_Mid1)
					if ((_By = (unsigned char)*_Mid1) < 0x80 || 0xc0 <= _By)
						return (_Mybase::error);	// not continuation byte
					else
						_Ch = _Ch << 6 | _By & 0x3f;

			if (*_Pstate == 0)
				{	// first time, maybe look for and consume header
				*_Pstate = 1;

				if ((_Mymode & consume_header) != 0 && _Ch == 0xfeff)
					{	// drop header and retry
					result _Ans = do_in(_State, _Mid1, _Last1, _Mid1,
						_First2, _Last2, _Mid2);

					if (_Ans == _Mybase::partial)
						{	// roll back header determination
						*_Pstate = 0;
						_Mid1 = _First1;
						}
					return (_Ans);
					}
				}

			if (_Mymax < _Ch)
				return (_Mybase::error);	// code too large
			*_Mid2++ = (_Elem)_Ch;
			}

		return (_First1 == _Mid1 ? _Mybase::partial : _Mybase::ok);
		}

	virtual result do_out(_Statype& _State,
		const _Elem *_First1, const _Elem *_Last1, const _Elem *& _Mid1,
		_Byte *_First2, _Byte *_Last2, _Byte *& _Mid2) const
		{	// convert [_First1, _Last1) to bytes [_First2, _Last)
		char *_Pstate = (char *)&_State;
		_Mid1 = _First1;
		_Mid2 = _First2;

		for (; _Mid1 != _Last1 && _Mid2 != _Last2; )
			{	// convert and put a wide char
			_Byte _By;
			int _Nextra;
			unsigned long _Ch = (unsigned long)*_Mid1;

			if (_Mymax < _Ch)
				return (_Mybase::error);

			if (_Ch < 0x0080)
				_By = (_Byte)_Ch, _Nextra = 0;
			else if (_Ch < 0x0800)
				_By = (_Byte)(0xc0 | _Ch >> 6), _Nextra = 1;
			else if (_Ch < 0x00010000)
				_By = (_Byte)(0xe0 | _Ch >> 12), _Nextra = 2;
			else if (_Ch < 0x00200000)
				_By = (_Byte)(0xf0 | _Ch >> 18), _Nextra = 3;
			else if (_Ch < 0x04000000)
				_By = (_Byte)(0xf8 | _Ch >> 24), _Nextra = 4;
			else
				_By = (_Byte)(0xfc | _Ch >> 30 & 0x03), _Nextra = 5;

			if (*_Pstate == 0)
				{	// first time, maybe generate header
				*_Pstate = 1;
				if ((_Mymode & generate_header) == 0)
					;
				else if (_Last2 - _Mid2 < 3 + 1 + _Nextra)
					return (_Mybase::partial);	// not enough room for both
				else
					{	// prepend header
					*_Mid2++ = (_Byte)(unsigned char)0xef;
					*_Mid2++ = (_Byte)(unsigned char)0xbb;
					*_Mid2++ = (_Byte)(unsigned char)0xbf;
					}
				}

			if (_Last2 - _Mid2 < 1 + _Nextra)
				break;	// not enough room for output

			++_Mid1;
			for (*_Mid2++ = _By; 0 < _Nextra; )
				*_Mid2++ = (_Byte)(_Ch >> 6 * --_Nextra & 0x3f | 0x80);
			}
		return (_First1 == _Mid1 ? _Mybase::partial : _Mybase::ok);
		}

	virtual result do_unshift(_Statype&,
		_Byte *_First2, _Byte *, _Byte *& _Mid2) const
		{	// generate bytes to return to default shift state
		_Mid2 = _First2;
		return (_Mybase::ok);
		}

	virtual int do_length(const _Statype& _State, const _Byte *_First1,
		const _Byte *_Last1, size_t _Count) const _THROW0()
		{	// return min(_Count, converted length of bytes [_First1, _Last1))
		size_t _Wchars = 0;
		_Statype _Mystate = _State;

		for (; _Wchars < _Count && _First1 != _Last1; )
			{	// convert another wide character
			const _Byte *_Mid1;
			_Elem *_Mid2;
			_Elem _Ch;

			switch (do_in(_Mystate, _First1, _Last1, _Mid1,
				&_Ch, &_Ch + 1, _Mid2))
				{	// test result of single wide-char conversion
			case _Mybase::noconv:
				return ((int)(_Wchars + (_Last1 - _First1)));

			case  _Mybase::ok:
				if (_Mid2 == &_Ch + 1)
					++_Wchars;	// replacement do_in might not convert one
				_First1 = _Mid1;
				break;

			default:
				return ((int)_Wchars);	// error or partial
				}
			}

		return ((int)_Wchars);
		}

	virtual bool do_always_noconv() const _THROW0()
		{	// return true if conversions never change input
		return (false);
		}

	virtual int do_max_length() const _THROW0()
		{	// return maximum length required for a conversion
		return ((_Mymode & (consume_header | generate_header)) != 0
			? 9 : 6);
		}

	virtual int do_encoding() const _THROW0()
		{	// return length of code sequence (from codecvt)
		return ((_Mymode & (consume_header | generate_header)) != 0
			? -1 : 0);	// -1 => state dependent, 0 => varying length
		}
	};

		// TEMPLATE CLASS codecvt_utf16
template<class _Elem,
	unsigned long _Mymax = 0x10ffff,
	codecvt_mode _Mymode = (codecvt_mode)0>
	class codecvt_utf16
	: public _STD codecvt<_Elem, char, _Statype>
	{	// facet for converting between _Elem and UTF-16 multibyte sequences
	enum {_Bytes_per_word = 2};
public:
 	typedef _STD codecvt<_Elem, char, _Statype> _Mybase;
	typedef typename _Mybase::result result;
	typedef char _Byte;
	typedef _Elem intern_type;
	typedef _Byte extern_type;
	typedef _Statype state_type;

	explicit codecvt_utf16(size_t _Refs = 0)
		: _Mybase(_Refs)
		{	// construct with ref count
		}

	virtual ~codecvt_utf16()
		{	// destroy the object
		}

protected:
	virtual result do_in(_Statype& _State,
		const _Byte *_First1, const _Byte *_Last1, const _Byte *& _Mid1,
		_Elem *_First2, _Elem *_Last2, _Elem *& _Mid2) const
		{	// convert bytes [_First1, _Last1) to [_First2, _Last)
		char *_Pstate = (char *)&_State;
		_Mid1 = _First1;
		_Mid2 = _First2;

		for (; _Bytes_per_word <= _Last1 - _Mid1 && _Mid2 != _Last2; )
			{	// convert a multibyte sequence
			unsigned char *_Ptr = (unsigned char *)_Mid1;
			unsigned long _Ch;
			unsigned short _Ch0, _Ch1;

			if (*_Pstate == _LITTLE_FIRST)
				_Ch0 = (unsigned short)(_Ptr[1] << 8 | _Ptr[0]);
			else if (*_Pstate == _BIG_FIRST)
				_Ch0 = (unsigned short)(_Ptr[0] << 8 | _Ptr[1]);
			else
				{	// no header seen yet, try preferred mode
				unsigned char _Default_endian = (_Mymode & little_endian) != 0
					? _LITTLE_FIRST : _BIG_FIRST;

				 if ((_Mymode & little_endian) != 0)
					_Ch0 = (unsigned short)(_Ptr[1] << 8 | _Ptr[0]);
				else
					_Ch0 = (unsigned short)(_Ptr[0] << 8 | _Ptr[1]);
				if ((_Mymode & consume_header) == 0
					|| _Ch0 != 0xfeff && _Ch0 != 0xfffe)
					*_Pstate = _Default_endian;
				else
					{	// consume header, fixate on endianness, and retry
					_Mid1 += _Bytes_per_word;
					*_Pstate = (char)(_Ch0 = 0xfeff
						? _Default_endian
						: (unsigned char)(3 - _Default_endian));
					result _Ans = do_in(_State, _Mid1, _Last1, _Mid1,
						_First2, _Last2, _Mid2);

					if (_Ans == _Mybase::partial)
						{	// not enough bytes, roll back header
						*_Pstate = 0;
						_Mid1 = _First1;
						}
					return (_Ans);
					}
				}

			if (_Ch0 < 0xd800 || 0xdc00 <= _Ch0)
				{	// one word, consume bytes
				_Mid1 += _Bytes_per_word;
				_Ch = _Ch0;
				}
			else if (_Last1 - _Mid1 < 2 * _Bytes_per_word)
				break;
			else
				{	// get second word
				if (*_Pstate == _LITTLE_FIRST)
					_Ch1 = (unsigned short)(_Ptr[3] << 8 | _Ptr[2]);
				else
					_Ch1 = (unsigned short)(_Ptr[2] << 8 | _Ptr[3]);

				if (_Ch1 < 0xdc00 || 0xe000 <= _Ch1)
					return (_Mybase::error);

				_Mid1 += 2 * _Bytes_per_word;
				_Ch = (unsigned long)(_Ch0 - 0xd800 + 0x0040) << 10
					| (_Ch1 - 0xdc00);
				}

			if (_Mymax < _Ch)
				return (_Mybase::error);	// code too large
			*_Mid2++ = (_Elem)_Ch;
			}

		return (_First1 == _Mid1 ? _Mybase::partial : _Mybase::ok);
		}

	virtual result do_out(_Statype& _State,
		const _Elem *_First1, const _Elem *_Last1, const _Elem *& _Mid1,
		_Byte *_First2, _Byte *_Last2, _Byte *& _Mid2) const
		{	// convert [_First1, _Last1) to bytes [_First2, _Last)
		char *_Pstate = (char *)&_State;
		_Mid1 = _First1;
		_Mid2 = _First2;

		if (*_Pstate == 0)
			{	// determine endianness once, maybe generate header
			*_Pstate = (_Mymode & little_endian) != 0
				? _LITTLE_FIRST : _BIG_FIRST;
			if ((_Mymode & generate_header) == 0)
				;
			else if (_Last2 - _Mid2 < 3 * _Bytes_per_word)
				return (_Mybase::partial);	// not enough room for all
			else if (*_Pstate == _LITTLE_FIRST)
				{	// put header LS byte first
				*_Mid2++ = (_Byte)(unsigned char)0xff;
				*_Mid2++ = (_Byte)(unsigned char)0xfe;
				}
			else
				{	// put header MS byte first
				*_Mid2++ = (_Byte)(unsigned char)0xfe;
				*_Mid2++ = (_Byte)(unsigned char)0xff;
				}
			}

		for (; _Mid1 != _Last1 && _Bytes_per_word <= _Last2 - _Mid2; )
			{	// convert and put a wide char
			bool _Extra = false;
			unsigned long _Ch = (unsigned long)*_Mid1++;

			if ((_Mymax < 0x10ffff ? _Mymax : 0x10ffff) < _Ch)
				return (_Mybase::error);	// value too large
			else if (_Ch <= 0xffff)
				{	// one word, can't be code for first of two
				if (0xd800 <= _Ch && _Ch < 0xdc00)
					return (_Mybase::error);
				}
			else if (_Last2 - _Mid2 < 2 * _Bytes_per_word)
				{	// not enough room for two-word output, back up
				--_Mid1;
				return (_Mybase::partial);
codecvt - 源码说明

本页面展示了「C语言库函数的原型,有用的拿去」中的 codecvt 源码文件，采用编程语言编写，共 753 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与C语言相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?