📄 xjis
字号:
// codecvt facet for JIS multibyte code, UCS-2 wide-character code
#pragma once
#ifndef _CVT_XJIS_
#define _CVT_XJIS_
#ifndef RC_INVOKED
#include <locale>
#include <cwchar>
#pragma pack(push,_CRT_PACKING)
#pragma warning(push,3)
#ifndef _CVT_JIS_
#error do not include directly
#endif /* _CVT_JIS */
namespace stdext {
namespace cvt {
typedef _CSTD mbstate_t _Statype;
#define _ESC_CODE 0x1b
// TEMPLATE CLASS codecvt_jis
template<class _Elem,
unsigned long _Maxcode = 0xffff>
class codecvt_jis
: public _STD codecvt<_Elem, char, _Statype>
{ // facet for converting between JIS-X0208 _Elem and UCS-2 bytes
typedef _tab_jis<int> _Table;
bool _Jis_to_ucs(unsigned short& _Ch) const
{ // convert _Ch from JIS-X0208 to UCS-2
if (_Ch < _Table::_Nlow)
; // map byte to same wide value
else if (_Ch < 0x100)
{ // map a one-byte code
if ((_Ch = _Table::_Btw[_Ch - _Table::_Nlow]) == 0)
return (false); // no defined mapping for byte
}
else
{ // search for a valid mapping
unsigned long _Lo = 0;
unsigned long _Hi = sizeof (_Table::_Dbvalid)
/ sizeof (_Table::_Dbvalid[0]);
for (; _Lo < _Hi; )
{ // test midpoint of remaining interval
unsigned long _Mid = (_Hi + _Lo) / 2;
if (_Ch < _Table::_Dbvalid[_Mid])
_Hi = _Mid;
else if (_Ch == _Table::_Dbvalid[_Mid])
{ // found the code, map it
_Ch = _Table::_Dbtw[_Mid];
return (true);
}
else
_Lo = _Mid + 1;
}
return (false);
}
return (true);
}
bool _Ucs_to_jis(unsigned short& _Ch) const
{ // convert _Ch from UCS-2 to JIS-X0208
if (_Ch < _Table::_Nlow || _Ch < 0x100
&& _Table::_Btw[_Ch - _Table::_Nlow] == _Ch)
; // map wide to same byte value
else
{ // search for a valid mapping
unsigned long _Lo = 0;
unsigned long _Hi = sizeof (_Table::_Wvalid)
/ sizeof (_Table::_Wvalid[0]);
for (; _Lo < _Hi; )
{ // test midpoint of remaining interval
unsigned long _Mid = (_Hi + _Lo) / 2;
if (_Ch < _Table::_Wvalid[_Mid])
_Hi = _Mid;
else if (_Ch == _Table::_Wvalid[_Mid])
{ // found the code, map it
_Ch = _Table::_Wtb[_Mid];
return (true);
}
else
_Lo = _Mid + 1;
}
return (false);
}
return (true);
}
public:
typedef _STD codecvt<_Elem, char, _Statype> _Mybase;
typedef typename _Mybase::result result;
typedef char _Byte;
typedef _Elem intern_type;
typedef _Byte extern_type;
typedef _Statype state_type;
explicit codecvt_jis(size_t _Refs = 0)
: _Mybase(_Refs)
{ // construct with ref count
}
virtual ~codecvt_jis()
{ // destroy the object
}
protected:
virtual result do_in(_Statype& _State,
const _Byte *_First1, const _Byte *_Last1, const _Byte *& _Mid1,
_Elem *_First2, _Elem *_Last2, _Elem *& _Mid2) const
{ // convert bytes [_First1, _Last1) to [_First2, _Last)
unsigned long _Maxc = _Maxcode; // to quiet warnings
char *_Pstate = (char *)&_State;
result _Ans = _Mybase::partial;
_Mid1 = _First1;
_Mid2 = _First2;
for (; _Mid1 != _Last1 && _Mid2 != _Last2; )
{ // convert a multibyte sequence
unsigned char _By = (unsigned char)*_Mid1;
unsigned short _Ch;
if (_By != _ESC_CODE)
;
else if (_Last1 - _Mid1 < 4)
break;
else if (_Mid1[1] == '('
&& (_Mid1[2] == 'B' || _Mid1[2] == 'J')
&& _Mid1[3] != _ESC_CODE)
{ // shift to 1-byte mode
_By = *(_Mid1 += 3);
*_Pstate = 0;
continue;
}
else if (_Mid1[1] == '$'
&& (_Mid1[2] == 'B' || _Mid1[2] == '@'))
if (_Last1 - _Mid1 < 5)
break; // not enough bytes for escape plus 2-byte code
else
{ // enough bytes, shift and convert
_By = *(_Mid1 += 3);
*_Pstate = 1;
continue;
}
else
return (_Mybase::error); // bad escape sequence
_Ch = _By;
if (*_Pstate == 0)
;
else if (_By < 0x21 || 0x7e < _By)
return (_Mybase::error); // bad first byte
else if (_Last1 - _Mid1 < 2)
break;
else if ((_By = *++_Mid1) < 0x21 || 0x7e < _By)
return (_Mybase::error); // bad second byte
else
_Ch = (unsigned short)(_Ch << 8 | _By);
if (!_Jis_to_ucs(_Ch) || _Maxc < _Ch)
return (_Mybase::error); // code too large
++_Mid1;
*_Mid2++ = (_Elem)_Ch;
_Ans = _Mybase::ok;
}
return (_Ans);
}
virtual result do_out(_Statype& _State,
const _Elem *_First1, const _Elem *_Last1, const _Elem *& _Mid1,
_Byte *_First2, _Byte *_Last2, _Byte *& _Mid2) const
{ // convert [_First1, _Last1) to bytes [_First2, _Last)
unsigned long _Maxc = _Maxcode; // to quiet warnings
char *_Pstate = (char *)&_State;
_Mid1 = _First1;
_Mid2 = _First2;
for (; _Mid1 != _Last1 && _Mid2 != _Last2; )
{ // convert and put a wide char
unsigned long _Uch = (unsigned long)*_Mid1;
unsigned short _Ch = (unsigned short)_Uch;
if (_Maxc < _Ch || !_Ucs_to_jis(_Ch))
return (_Mybase::error);
if (_Ch <= 0xff)
{ // put a 1-byte code
if (_Ch == _ESC_CODE)
return (_Mybase::error); // can't output bald ESC
else if (*_Pstate == 0)
; // already in proper state
else if (_Last2 - _Mid2 < 4)
break;
else
{ // put shift to 1-byte state
*_Mid2++ = _ESC_CODE;
*_Mid2++ = '(';
*_Mid2++ = 'B';
*_Pstate = 0;
}
*_Mid2++ = (_Byte)_Ch;
}
else
{ // put a 2-byte code
if (*_Pstate != 0)
{ // already in proper state, check room
if (_Last2 - _Mid2 < 2)
break;
}
else if (_Last2 - _Mid2 < 5)
break;
else
{ // put shift to 2-byte state
*_Mid2++ = _ESC_CODE;
*_Mid2++ = '$';
*_Mid2++ = 'B';
*_Pstate = 1;
}
unsigned char _By = (unsigned char)(_Ch >> 8);
if (_By < 0x21 || 0x7e < _By)
return (_Mybase::error); // bad first byte
*_Mid2++ = (_Byte)_By;
_By = (unsigned char)_Ch;
if (_By < 0x21 || 0x7e < _By)
return (_Mybase::error); // bad first byte
*_Mid2++ = (_Byte)_By;
}
++_Mid1;
}
return (_First1 == _Mid1 ? _Mybase::partial : _Mybase::ok);
}
virtual result do_unshift(_Statype& _State,
_Byte *_First2, _Byte *, _Byte *& _Mid2) const
{ // generate bytes to return to default shift state
char *_Pstate = (char *)&_State;
_Mid2 = _First2;
switch (*_Pstate)
{
case 1: // need to home, put first of three bytes
*_Pstate = 2;
*_Mid2++ = _ESC_CODE;
break;
case 2: // put second of three bytes
*_Pstate = 3;
*_Mid2++ = '(';
break;
case 3: // put third of three bytes
*_Pstate = 0;
*_Mid2++ = 'B';
case 0: // fall through
return (_Mybase::ok);
}
return (_Mybase::partial);
}
virtual int do_length(const _Statype& _State, const _Byte *_First1,
const _Byte *_Last1, size_t _Count) const _THROW0()
{ // return min(_Count, converted length of bytes [_First1, _Last1))
size_t _Wchars = 0;
_Statype _Mystate = _State;
for (; _Wchars < _Count && _First1 != _Last1; )
{ // convert another wide character
const _Byte *_Mid1;
_Elem *_Mid2;
_Elem _Ch;
switch (do_in(_Mystate, _First1, _Last1, _Mid1,
&_Ch, &_Ch + 1, _Mid2))
{ // test result of single wide-char conversion
case _Mybase::noconv:
return ((int)(_Wchars + (_Last1 - _First1)));
case _Mybase::ok:
if (_Mid2 == &_Ch + 1)
++_Wchars; // replacement do_in might not convert one
_First1 = _Mid1;
break;
default:
return ((int)_Wchars); // error or partial
}
}
return ((int)_Wchars);
}
virtual bool do_always_noconv() const _THROW0()
{ // return true if conversions never change input
return (false);
}
virtual int do_max_length() const _THROW0()
{ // return maximum length required for a conversion
return (5);
}
virtual int do_encoding() const _THROW0()
{ // return length of code sequence (from codecvt)
return (0); // 0 => varying length
}
};
} // namespace cvt
} // namespace stdext
#ifdef _TEST_IT
#define NCHARS 0x10000
#define MYWC_MAX 0xffff
#define MYFILE "xjis"
#define MYNAME stdext::cvt::codecvt_jis<Mywchar>
#include <cvt/xtest>
#endif /* _TEST_IT */
#pragma warning(pop)
#pragma pack(pop)
#endif /* RC_INVOKED */
#endif /* _CVT_XJIS_ */
/*
* Copyright (c) 1992-2009 by P.J. Plauger. ALL RIGHTS RESERVED.
* Consult your license regarding permissions and restrictions.
V5.20:0009 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -