strutil_encoding.cpp

来自「一款密码保险箱源码」· C++ 代码 · 共 348 行

CPP
348
字号
/*
  KeePass Password Safe - The Open-Source Password Manager
  Copyright (C) 2003-2007 Dominik Reichl <dominik.reichl@t-online.de>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*/

#include "StdAfx.h"
#include "../Util/MemUtil.h"
#include "../Util/StrUtil.h"

char *_StringToAnsi(const WCHAR *lptString)
{
	char *p = NULL;
	int _nChars = 0;

	if(lptString == NULL) { ASSERT(FALSE); return NULL; }

#ifdef _UNICODE
	_nChars = lstrlen(lptString) + 1;
	p = new char[_nChars * 2 + 2];
	p[0] = 0; p[1] = 0;
	VERIFY(WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, lptString, -1, p,
		_nChars, NULL, NULL) != ERROR_INSUFFICIENT_BUFFER);
#else
	_nChars = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, lptString, -1, NULL, 0, NULL, NULL);
	p = new char[_nChars * 2 + 2];
	p[0] = 0; p[1] = 0;
	VERIFY(WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, lptString,
		-1, p, _nChars, NULL, NULL) != ERROR_INSUFFICIENT_BUFFER);
#endif

	return p;
}

WCHAR *_StringToUnicode(const char *pszString)
{
	int _nChars = 0;

	if(pszString == NULL) { ASSERT(FALSE); return NULL; }

	WCHAR *p;

#ifdef _UNICODE
	// Determine the correct buffer size by calling the function itself with 0 as buffer size (see docs)
	_nChars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, pszString, -1, NULL, 0);

	p = new WCHAR[_nChars + 2];
	p[0] = 0; p[1] = 0;

	// Jan 9th 2004: DonAngel {
	// This was ASSERTing for string. All debugging did not given good results, so I decided to remove
	// the verification. This could be a bug in MultiByteToWideChar, because thou it was returning
	// ERROR_INSUFFICIENT_BUFFER, the convertion was OK!?
	// The problem should be investigated later, but for now - I prefer to remove the ASSERT
	// VERIFY(MultiByteToWideChar(CP_ACP, 0, pszString, -1, p, _nChars) !=
	//	ERROR_INSUFFICIENT_BUFFER);
	// Jan 9th 2004: DonAngel }

	MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, pszString, -1, p, _nChars);
#else
	_nChars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, pszString, -1, NULL, 0);
	p = new WCHAR[_nChars * 2 + 2];
	p[0] = 0; p[1] = 0;
	VERIFY(MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, pszString, -1, (LPWSTR) p, _nChars) !=
		ERROR_INSUFFICIENT_BUFFER);
#endif

	return p;
}

C_FN_SHARE UTF8_BYTE *_StringToUTF8(const TCHAR *pszSourceString)
{
	DWORD i, j = 0;
	DWORD dwLength, dwBytesNeeded;
	BYTE *p = NULL;
	WCHAR ut;
	const WCHAR *pUni = NULL;

#ifndef _UNICODE
	WCHAR *pUniBuffer = NULL;
	DWORD dwUniBufferLength = 0;
#endif

	ASSERT(pszSourceString != NULL); if(pszSourceString == NULL) return NULL;

#ifdef _UNICODE
	dwLength = lstrlen(pszSourceString) + 1; // In order to be compatible with the code below, add 1 for the zero at the end of the buffer
	pUni = pszSourceString;
#else
	// This returns the new length plus the zero byte - i.e. the whole buffer!
	dwLength = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, pszSourceString, -1, NULL, 0);
	dwUniBufferLength = dwLength + 2;
	pUniBuffer = new WCHAR[dwUniBufferLength];
	pUniBuffer[0] = 0; pUniBuffer[1] = 0;
	pUni = pUniBuffer;
	MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, pszSourceString, -1, pUniBuffer, dwLength);
#endif

	// Both counting and converting routines need update to support surrogates
	// count UTF-8 needed bytes
	dwBytesNeeded = 0;
	for(i = 0; i < dwLength; i++)
	{
		ut = pUni[i];

		if(ut == 0) break;

		if(ut < 0x80) dwBytesNeeded++;
		else if(ut < 0x0800) dwBytesNeeded += 2;
		else dwBytesNeeded += 3;
	}

	p = new BYTE[dwBytesNeeded + 2];
	ASSERT(p != NULL); if(p == NULL) return NULL;

	j = 0;
	for(i = 0; i < dwLength; i++)
	{
		ut = pUni[i];

		// if(ut == 0) break;

		if(ut < 0x80) // 7-bit character, store as it is
		{
			p[j] = (BYTE)ut; j++;
		}
		else if(ut < 0x800) // Are 2 bytes enough?
		{
			p[j] = (BYTE)(0xC0 | (ut >> 6)); j++;
			p[j] = (BYTE)(0x80 | (ut & 0x3F)); j++;
		}
		else // Maximum bytes needed for UCS-2 is 3 bytes in UTF-8
		{
			p[j] = (BYTE)(0xE0 | (ut >> 12)); j++;
			p[j] = (BYTE)(0x80 | ((ut >> 6) & 0x3F)); j++;
			p[j] = (BYTE)(0x80 | (ut & 0x3F)); j++;
		}
	}
	p[j] = 0; // Terminate string
	ASSERT(j == (dwBytesNeeded + 1));

#ifndef _UNICODE
	mem_erase((unsigned char *)pUniBuffer, dwUniBufferLength * sizeof(WCHAR));
	SAFE_DELETE_ARRAY(pUniBuffer);
#endif

	return p;
}

C_FN_SHARE DWORD _UTF8NumChars(const UTF8_BYTE *pUTF8String)
{
	DWORD i = 0, dwLength = 0;
	BYTE bt;

	ASSERT(pUTF8String != NULL);
	if(pUTF8String == NULL) return 0;

	while(1)
	{
		bt = pUTF8String[i];
		if(bt == 0) break;
		else if((bt & 0x80) == 0) dwLength++;
		else if((bt & 0xC0) == 0xC0) dwLength++;
		else if((bt & 0xE0) == 0xE0) dwLength++;
		i++;
	}

	return dwLength;
}

// This returns the needed bytes to represent the string, without terminating NULL character
C_FN_SHARE DWORD _UTF8BytesNeeded(const TCHAR *pszString)
{
	DWORD i = 0;
	DWORD dwBytes = 0;
	USHORT us;

	// Don't use this function any more. The _StringToUTF8 function uses some pre-conversion
	// functions that makes a simple length calculation like in this function impossible.
	// If you really need this function, comment out the following ASSERT, but be careful!
	ASSERT(FALSE);

	ASSERT(pszString != NULL);
	if(pszString == NULL) return 0;

	while(1)
	{
#ifdef _UNICODE
		us = (USHORT)pszString[i];
#else
		us = (USHORT)(((USHORT)((BYTE)pszString[i])) & 0x00FF);
#endif

		if(us == 0) break;

		if(us < 0x0080) dwBytes++;
		else if(us < 0x0800) dwBytes += 2;
		else dwBytes += 3;

		i++;
	}

	return dwBytes;
}

C_FN_SHARE TCHAR *_UTF8ToString(const UTF8_BYTE *pUTF8String)
{
	DWORD i, j;
	DWORD dwNumChars, dwMoreBytes, dwPBufLength;
	BYTE b0, b1, b2;
	WCHAR *p;
	WCHAR tch;

#ifndef _UNICODE
	WCHAR *pANSI;
#endif

	ASSERT(pUTF8String != NULL); if(pUTF8String == NULL) return NULL;

	// Count the needed Unicode chars (right counterpart to _StringToUTF8)
	i = 0; dwNumChars = 0;
	while(1)
	{
		b0 = (BYTE)pUTF8String[i];
		dwMoreBytes = 0;
		if(b0 == 0) break;
		else if(b0 < 0xC0) dwMoreBytes++;
		else if(b0 < 0xE0) dwMoreBytes++;
		else if(b0 < 0xF0) dwMoreBytes++;
		dwNumChars++;
		i += dwMoreBytes;

		if(dwMoreBytes == 0) return NULL; // Invalid UTF-8 string
	}
	// if(dwNumChars == 0) return NULL;

	dwPBufLength = dwNumChars + 2;
	p = new WCHAR[dwPBufLength];
	ASSERT(p != NULL); if(p == NULL) return NULL;

	i = 0; j = 0;
	while(1)
	{
		b0 = pUTF8String[i]; i++;

		if(b0 < 0x80)
		{
			p[j] = (WCHAR)b0; j++;
		}
		else
		{
			b1 = pUTF8String[i]; i++;

			ASSERT((b1 & 0xC0) == 0x80);
			if((b1 & 0xC0) != 0x80) break;

			if((b0 & 0xE0) == 0xC0)
			{
				tch = (WCHAR)(b0 & 0x1F);
				tch <<= 6;
				tch |= (b1 & 0x3F);
				p[j] = tch; j++;
			}
			else
			{
				b2 = pUTF8String[i]; i++;

				ASSERT((b2 & 0xC0) == 0x80);
				if((b2 & 0xC0) != 0x80) break;

				tch = (WCHAR)(b0 & 0xF);
				tch <<= 6;
				tch |= (b1 & 0x3F);
				tch <<= 6;
				tch |= (b2 & 0x3F);
				p[j] = tch; j++;
			}
		}

		if(b0 == 0) break;
	}

#ifdef _UNICODE
	return (TCHAR *)p;
#else
	// Got Unicode, convert to ANSI
	dwNumChars = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, p, -1, NULL, 0, NULL, NULL);
	pANSI = new WCHAR[dwNumChars + 2];
	pANSI[0] = 0;

	int nErr = WideCharToMultiByte(CP_ACP, WC_COMPOSITECHECK, p, -1, (LPSTR)pANSI, dwNumChars, NULL, NULL);
	if(dwNumChars != 122) { ASSERT(nErr != 122); } // ERROR_INSUFFICIENT_BUFFER is defined as 122...
	else { ASSERT(GetLastError() == 0); nErr = nErr; }

	if(p != NULL) mem_erase((unsigned char *)p, dwPBufLength);
	SAFE_DELETE_ARRAY(p);
	return (TCHAR *)pANSI;
#endif
}

C_FN_SHARE BOOL _IsUTF8String(const UTF8_BYTE *pUTF8String)
{
	DWORD i = 0;
	BYTE b0, b1, b2;

	ASSERT(pUTF8String != NULL); if(pUTF8String == NULL) return FALSE;

	if(pUTF8String[0] == 0xEF)
		if(pUTF8String[1] == 0xBB)
			if(pUTF8String[2] == 0xBF) i += 3;

	while(1)
	{
		b0 = pUTF8String[i]; i++;

		if(b0 >= 0x80)
		{
			b1 = pUTF8String[i]; i++;

			if((b1 & 0xC0) != 0x80) return FALSE;

			if((b0 & 0xE0) != 0xC0)
			{
				b2 = pUTF8String[i]; i++;

				if((b2 & 0xC0) != 0x80) return FALSE;
			}
		}

		if(b0 == 0) break;
	}

	return TRUE;
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?