strconv.cpp
来自「A*算法 A*算法 A*算法 A*算法A*算法A*算法」· C++ 代码 · 共 2,176 行 · 第 1/5 页
CPP
2,176 行
(errno != E2BIG || bufLeft != 0))
#else
#define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
#endif
#define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
// ----------------------------------------------------------------------------
// wxMBConv_iconv: encapsulates an iconv character set
// ----------------------------------------------------------------------------
class wxMBConv_iconv : public wxMBConv
{
public:
wxMBConv_iconv(const wxChar *name);
virtual ~wxMBConv_iconv();
virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
bool IsOk() const
{ return (m2w != (iconv_t)-1) && (w2m != (iconv_t)-1); }
protected:
// the iconv handlers used to translate from multibyte to wide char and in
// the other direction
iconv_t m2w,
w2m;
#if wxUSE_THREADS
// guards access to m2w and w2m objects
wxMutex m_iconvMutex;
#endif
private:
// the name (for iconv_open()) of a wide char charset -- if none is
// available on this machine, it will remain NULL
static const char *ms_wcCharsetName;
// true if the wide char encoding we use (i.e. ms_wcCharsetName) has
// different endian-ness than the native one
static bool ms_wcNeedsSwap;
};
// make the constructor available for unit testing
WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
{
wxMBConv_iconv* result = new wxMBConv_iconv( name );
if ( !result->IsOk() )
{
delete result;
return 0;
}
return result;
}
const char *wxMBConv_iconv::ms_wcCharsetName = NULL;
bool wxMBConv_iconv::ms_wcNeedsSwap = false;
wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
{
// iconv operates with chars, not wxChars, but luckily it uses only ASCII
// names for the charsets
const wxCharBuffer cname(wxString(name).ToAscii());
// check for charset that represents wchar_t:
if (ms_wcCharsetName == NULL)
{
ms_wcNeedsSwap = false;
// try charset with explicit bytesex info (e.g. "UCS-4LE"):
ms_wcCharsetName = WC_NAME_BEST;
m2w = iconv_open(ms_wcCharsetName, cname);
if (m2w == (iconv_t)-1)
{
// try charset w/o bytesex info (e.g. "UCS4")
// and check for bytesex ourselves:
ms_wcCharsetName = WC_NAME;
m2w = iconv_open(ms_wcCharsetName, cname);
// last bet, try if it knows WCHAR_T pseudo-charset
if (m2w == (iconv_t)-1)
{
ms_wcCharsetName = "WCHAR_T";
m2w = iconv_open(ms_wcCharsetName, cname);
}
if (m2w != (iconv_t)-1)
{
char buf[2], *bufPtr;
wchar_t wbuf[2], *wbufPtr;
size_t insz, outsz;
size_t res;
buf[0] = 'A';
buf[1] = 0;
wbuf[0] = 0;
insz = 2;
outsz = SIZEOF_WCHAR_T * 2;
wbufPtr = wbuf;
bufPtr = buf;
res = iconv(m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
(char**)&wbufPtr, &outsz);
if (ICONV_FAILED(res, insz))
{
ms_wcCharsetName = NULL;
wxLogLastError(wxT("iconv"));
wxLogError(_("Conversion to charset '%s' doesn't work."), name);
}
else
{
ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
}
}
else
{
ms_wcCharsetName = NULL;
// VS: we must not output an error here, since wxWidgets will safely
// fall back to using wxEncodingConverter.
wxLogTrace(TRACE_STRCONV, wxT("Impossible to convert to/from charset '%s' with iconv, falling back to wxEncodingConverter."), name);
}
}
wxLogTrace(TRACE_STRCONV,
wxT("wchar_t charset is '%s', needs swap: %i"),
ms_wcCharsetName ? ms_wcCharsetName : "<none>", ms_wcNeedsSwap);
}
else // we already have ms_wcCharsetName
{
m2w = iconv_open(ms_wcCharsetName, cname);
}
// NB: don't ever pass NULL to iconv_open(), it may crash!
if ( ms_wcCharsetName )
{
w2m = iconv_open( cname, ms_wcCharsetName);
}
else
{
w2m = (iconv_t)-1;
}
}
wxMBConv_iconv::~wxMBConv_iconv()
{
if ( m2w != (iconv_t)-1 )
iconv_close(m2w);
if ( w2m != (iconv_t)-1 )
iconv_close(w2m);
}
size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
#if wxUSE_THREADS
// NB: iconv() is MT-safe, but each thread must use it's own iconv_t handle.
// Unfortunately there is a couple of global wxCSConv objects such as
// wxConvLocal that are used all over wx code, so we have to make sure
// the handle is used by at most one thread at the time. Otherwise
// only a few wx classes would be safe to use from non-main threads
// as MB<->WC conversion would fail "randomly".
wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
#endif
size_t inbuf = strlen(psz);
size_t outbuf = n * SIZEOF_WCHAR_T;
size_t res, cres;
// VS: Use these instead of psz, buf because iconv() modifies its arguments:
wchar_t *bufPtr = buf;
const char *pszPtr = psz;
if (buf)
{
// have destination buffer, convert there
cres = iconv(m2w,
ICONV_CHAR_CAST(&pszPtr), &inbuf,
(char**)&bufPtr, &outbuf);
res = n - (outbuf / SIZEOF_WCHAR_T);
if (ms_wcNeedsSwap)
{
// convert to native endianness
WC_BSWAP(buf /* _not_ bufPtr */, res)
}
// NB: iconv was given only strlen(psz) characters on input, and so
// it couldn't convert the trailing zero. Let's do it ourselves
// if there's some room left for it in the output buffer.
if (res < n)
buf[res] = 0;
}
else
{
// no destination buffer... convert using temp buffer
// to calculate destination buffer requirement
wchar_t tbuf[8];
res = 0;
do {
bufPtr = tbuf;
outbuf = 8*SIZEOF_WCHAR_T;
cres = iconv(m2w,
ICONV_CHAR_CAST(&pszPtr), &inbuf,
(char**)&bufPtr, &outbuf );
res += 8-(outbuf/SIZEOF_WCHAR_T);
} while ((cres==(size_t)-1) && (errno==E2BIG));
}
if (ICONV_FAILED(cres, inbuf))
{
//VS: it is ok if iconv fails, hence trace only
wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
return (size_t)-1;
}
return res;
}
size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
{
#if wxUSE_THREADS
// NB: explained in MB2WC
wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
#endif
size_t inbuf = wxWcslen(psz) * SIZEOF_WCHAR_T;
size_t outbuf = n;
size_t res, cres;
wchar_t *tmpbuf = 0;
if (ms_wcNeedsSwap)
{
// need to copy to temp buffer to switch endianness
// this absolutely doesn't rock!
// (no, doing WC_BSWAP twice on the original buffer won't help, as it
// could be in read-only memory, or be accessed in some other thread)
tmpbuf=(wchar_t*)malloc((inbuf+1)*SIZEOF_WCHAR_T);
memcpy(tmpbuf,psz,(inbuf+1)*SIZEOF_WCHAR_T);
WC_BSWAP(tmpbuf, inbuf)
psz=tmpbuf;
}
if (buf)
{
// have destination buffer, convert there
cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
res = n-outbuf;
// NB: iconv was given only wcslen(psz) characters on input, and so
// it couldn't convert the trailing zero. Let's do it ourselves
// if there's some room left for it in the output buffer.
if (res < n)
buf[0] = 0;
}
else
{
// no destination buffer... convert using temp buffer
// to calculate destination buffer requirement
char tbuf[16];
res = 0;
do {
buf = tbuf; outbuf = 16;
cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
res += 16 - outbuf;
} while ((cres==(size_t)-1) && (errno==E2BIG));
}
if (ms_wcNeedsSwap)
{
free(tmpbuf);
}
if (ICONV_FAILED(cres, inbuf))
{
//VS: it is ok if iconv fails, hence trace only
wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
return (size_t)-1;
}
return res;
}
#endif // HAVE_ICONV
// ============================================================================
// Win32 conversion classes
// ============================================================================
#ifdef wxHAVE_WIN32_MB2WC
// from utils.cpp
#if wxUSE_FONTMAP
extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
#endif
class wxMBConv_win32 : public wxMBConv
{
public:
wxMBConv_win32()
{
m_CodePage = CP_ACP;
}
#if wxUSE_FONTMAP
wxMBConv_win32(const wxChar* name)
{
m_CodePage = wxCharsetToCodepage(name);
}
wxMBConv_win32(wxFontEncoding encoding)
{
m_CodePage = wxEncodingToCodepage(encoding);
}
#endif
size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
{
// note that we have to use MB_ERR_INVALID_CHARS flag as it without it
// the behaviour is not compatible with the Unix version (using iconv)
// and break the library itself, e.g. wxTextInputStream::NextChar()
// wouldn't work if reading an incomplete MB char didn't result in an
// error
//
// note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
// an error (tested under Windows Server 2003) and apparently it is
// done on purpose, i.e. the function accepts any input in this case
// and although I'd prefer to return error on ill-formed output, our
// own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
// explicitly ill-formed according to RFC 2152) neither so we don't
// even have any fallback here...
int flags = m_CodePage == CP_UTF7 ? 0 : MB_ERR_INVALID_CHARS;
const size_t len = ::MultiByteToWideChar
(
m_CodePage, // code page
flags, // flags: fall on error
psz, // input string
-1, // its length (NUL-terminated)
buf, // output string
buf ? n : 0 // size of output buffer
);
// note that it returns count of written chars for buf != NULL and size
// of the needed buffer for buf == NULL so in either case the length of
// the string (which never includes the terminating NUL) is one less
return len ? len - 1 : (size_t)-1;
}
size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
{
/*
we have a problem here: by default, WideCharToMultiByte() may
replace characters unrepresentable in the target code page with bad
quality approximations such as turning "1/2" symbol (U+00BD) into
"1" for the code pages which don't have it and we, obviously, want
to avoid this at any price
the trouble is that this function does it _silently_, i.e. it won't
even tell us whether it did or not... Win98/2000 and higher provide
WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
we have to resort to a round trip, i.e. check that converting back
results in the same string -- this is, of course, expensive but
otherwise we simply can't be sure to not garble the data.
*/
// determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
// it doesn't work with CJK encodings (which we test for rather roughly
// here...) nor with UTF-7/8 nor, of course, with Windows versions not
// supporting it
BOOL usedDef wxDUMMY_INITIALIZE(false);
BOOL *pUsedDef;
int flags;
if ( CanUseNoBestFit() && m_CodePage < 50000 )
{
// it's our lucky day
flags = WC_NO_BEST_FIT_CHARS;
pUsedDef = &usedDef;
}
else // old system or unsupported encoding
{
flags = 0;
pUsedDef = NULL;
}
const size_t len = ::WideCharToMultiByte
(
m_CodePage, // code page
flags, // either none or no best fit
pwz, // input string
-1, // it is (wide) NUL-terminated
buf, // output buffer
buf ? n : 0, // and its size
NULL, // default "replacement" char
pUsedDef // [out] was it used?
);
if ( !len )
{
// function totally failed
return (size_t)-1;
}
// if we were really converting, check if we succeeded
if ( buf )
{
if ( flags )
{
// check if the conversion failed, i.e. if any replacements
// were done
if ( usedDef )
return (size_t)-1;
}
else // we must resort to double tripping...
{
wxWCharBuffer wcBuf(n);
if ( MB2WC(wcBuf.data(), buf, n) == (size_t)-1 ||
wcscmp(wcBuf, pwz) != 0 )
{
// we didn't obtain the same thing we started from, hence
// the conversion was lossy and we consider that it failed
return (size_t)-1;
}
}
}
// see the comment above for the reason of "len - 1"
return len - 1;
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?