📄 stringconversion.cpp

📁 电驴的源代码
💻 CPP
字号:
//this file is part of eMule
//Copyright (C)2004 Merkur ( devs@emule-project.net / http://www.emule-project.net )
//
//This program is free software; you can redistribute it and/or
//modify it under the terms of the GNU General Public License
//as published by the Free Software Foundation; either
//version 2 of the License, or (at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#include "stdafx.h"
#include "StringConversion.h"
#include <atlenc.h>

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif


int utf8towc(LPCSTR pcUtf8, UINT uUtf8Size, LPWSTR pwc, UINT uWideCharSize)
{
	LPWSTR pwc0 = pwc;

    while (uUtf8Size && uWideCharSize)
    {
        BYTE ucChar = *pcUtf8++;
        if (ucChar < 0x80)
		{
            uUtf8Size--;
            uWideCharSize--;
            *(pwc++) = ucChar;
        }
		else if ((ucChar & 0xC0) != 0xC0)
		{
            return -1; // Invalid UTF8 string..
        }
        else
        {
            BYTE ucMask = 0xE0;
            UINT uExpectedBytes = 1;
            while ((ucChar & ucMask) == ucMask)
			{
                ucMask |= ucMask >> 1;
                if (++uExpectedBytes > 3)
                    return -1; // Invalid UTF8 string..
            }

            if (uUtf8Size <= uExpectedBytes)
                return -1; // Invalid UTF8 string..

            UINT uProcessedBytes = 1 + uExpectedBytes;
            UINT uWideChar = (UINT)(ucChar & ~ucMask);
            if (uExpectedBytes == 1)
			{
                if ((uWideChar & 0x1E) == 0)
                    return -1; // Invalid UTF8 string..
            }
            else
			{
                if (uWideChar == 0 && ((BYTE)*pcUtf8 & 0x3F & (ucMask << 1)) == 0)
                    return -1; // Invalid UTF8 string..

                if (uExpectedBytes == 2)
				{
                    //if (uWideChar == 0x0D && ((BYTE)*pcUtf8 & 0x20))
                    //    return -1;
                }
                else if (uExpectedBytes == 3)
				{
                    if (uWideChar > 4)
                        return -1; // Invalid UTF8 string..
                    if (uWideChar == 4 && ((BYTE)*pcUtf8 & 0x30))
                        return -1; // Invalid UTF8 string..
                }
            }

            if (uWideCharSize < (UINT)(uExpectedBytes > 2) + 1)
                break; // buffer full

            while (uExpectedBytes--)
            {
                if (((ucChar = (BYTE)*(pcUtf8++)) & 0xC0) != 0x80)
                    return -1; // Invalid UTF8 string..
                uWideChar <<= 6;
                uWideChar |= (ucChar & 0x3F);
            }
            uUtf8Size -= uProcessedBytes;

            if (uWideChar < 0x10000)
            {
                uWideCharSize--;
                *(pwc++) = uWideChar;
            }
            else 
            {
                uWideCharSize -= 2;
                uWideChar -= 0x10000;
                *(pwc++) = 0xD800 | (uWideChar >> 10);
                *(pwc++) = 0xDC00 | (uWideChar & 0x03FF);
            }
        }
    }

    return pwc - pwc0;
}

int ByteStreamToWideChar(LPCSTR pcUtf8, UINT uUtf8Size, LPWSTR pwc, UINT uWideCharSize)
{
	int iWideChars = utf8towc(pcUtf8, uUtf8Size, pwc, uWideCharSize);
	if (iWideChars < 0)
	{
		LPWSTR pwc0 = pwc;
		while (uUtf8Size && uWideCharSize)
		{
			if ((*pwc++ = (BYTE)*pcUtf8++) == L'\0')
				break;
			uUtf8Size--;
			uWideCharSize--;
		}
		iWideChars = pwc - pwc0;
	}
	return iWideChars;
}

//void CreateBOMUTF8String(const CStringW& rwstr, CStringA& rstrUTF8)
//{
//	int iChars = AtlUnicodeToUTF8(rwstr, rwstr.GetLength(), NULL, 0);
//	int iRawChars = 3 + iChars;
//	LPSTR pszUTF8 = rstrUTF8.GetBuffer(iRawChars);
//	*pszUTF8++ = 0xEFU;
//	*pszUTF8++ = 0xBBU;
//	*pszUTF8++ = 0xBFU;
//	AtlUnicodeToUTF8(rwstr, rwstr.GetLength(), pszUTF8, iRawChars);
//	rstrUTF8.ReleaseBuffer(iRawChars);
//}

CStringA wc2utf8(const CStringW& rwstr)
{
	CStringA strUTF8;
	int iChars = AtlUnicodeToUTF8(rwstr, rwstr.GetLength(), NULL, 0);
	if (iChars > 0)
	{
		LPSTR pszUTF8 = strUTF8.GetBuffer(iChars);
		AtlUnicodeToUTF8(rwstr, rwstr.GetLength(), pszUTF8, iChars);
		strUTF8.ReleaseBuffer(iChars);
	}
	return strUTF8;
}

CString OptUtf8ToStr(const CStringA& rastr)
{
	CStringW wstr;
	int iMaxWideStrLen = rastr.GetLength();
	LPWSTR pwsz = wstr.GetBuffer(iMaxWideStrLen);
	int iWideChars = utf8towc(rastr, rastr.GetLength(), pwsz, iMaxWideStrLen);
	if (iWideChars <= 0)
	{
		// invalid UTF8 string...
		wstr.ReleaseBuffer(0);
		wstr = rastr;				// convert with local codepage
	}
	else
		wstr.ReleaseBuffer(iWideChars);
#ifdef _UNICODE
	return wstr;					// just return the string
#else
	return CString(wstr);			// convert to local codepage
#endif
}

CString OptUtf8ToStr(const CStringW& rwstr)
{
	CStringA astr;
	for (int i = 0; i < rwstr.GetLength(); i++)
	{
		if (rwstr[i] > 0xFF)
		{
			// this is no UTF8 string (it's already an Unicode string)...
#ifdef _UNICODE
			return rwstr;			// just return the string
#else
			return CString(rwstr);	// convert to local codepage
#endif
		}
		astr += (BYTE)rwstr[i];
	}
	return OptUtf8ToStr(astr);
}

CStringA StrToUtf8(const CString& rstr)
{
#ifdef _UNICODE
	return wc2utf8(rstr);
#else
	CStringW wstr(rstr);
	return wc2utf8(wstr);
#endif
}

bool IsValidEd2kString(LPCTSTR psz)
{
	while (*psz != _T('\0'))
	{
		// NOTE: The '?' is the character which is returned by windows if user entered an Unicode string into
		// an edit control (although application runs in ANSI mode).
		// The '?' is also invalid for search expressions and filenames.
		if (*psz == _T('?'))
			return false;
		psz++;
	}
	return true;
}

bool IsValidEd2kStringA(LPCSTR psz)
{
	while (*psz != '\0')
	{
		// NOTE: The '?' is the character which is returned by windows if user entered an Unicode string into
		// an edit control (although application runs in ANSI mode).
		// The '?' is also invalid for search expressions and filenames.
		if (*psz == '?')
			return false;
		psz++;
	}
	return true;
}

CString EncodeUrlUtf8(const CString& rstr)
{
	CString url;
	CStringA utf8(StrToUtf8(rstr));
	for (int i = 0; i < utf8.GetLength(); i++)
	{
		if ((BYTE)utf8[i] >= 0x7F)
			url.AppendFormat(_T("%%%02X"), (BYTE)utf8[i]);
		else
			url += utf8[i];
	}
	return url;
}
💿 文件大小 4150 K
👤 上传用户 yubo0808140725
📂 所属分类 Internet/网络编程
🏷️ 相关标签

#源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -