首页 › 资源下载 › 其他 › Encode/Decode UTF-8 › 源码查看
stdiofileex.cpp

来自「Encode/Decode UTF-8 format from/to text 」· C++ 代码 · 共 666 行 · 第 1/2 页
CPP
666 行
// StdioFileEx.cpp: implementation of the CStdioFileEx class.
//
// Version 1.1 23 August 2003.	Incorporated fixes from Dennis Jeryd.
// Version 1.3 19 February 2005. Incorporated fixes from Howard J Oh and some of my own.
// Version 1.4 26 February 2005. Fixed stupid screw-up in code from 1.3.
// Version 1.5 18 November 2005. - Incorporated fixes from Andy Goodwin.
//											- Allows code page to be specified for reading/writing
//											- Properly calculates multibyte buffer size instead of
//												assuming lstrlen(s).
//											- Should handle UTF8 properly.
//
// Copyright David Pritchard 2003-2005. davidpritchard@ctv.es
//
// You can use this class freely, but please keep my ego happy 
// by leaving this comment in place.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "StdioFileEx.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

/*static*/ const UINT CStdioFileEx::modeWriteUnicode = 0x20000; // Add this flag to write in Unicode

CStdioFileEx::CStdioFileEx(): CStdioFile()
{
	m_bIsUnicodeText = false;
	m_nFileCodePage = -1;
}

CStdioFileEx::CStdioFileEx(LPCTSTR lpszFileName,UINT nOpenFlags)
	:CStdioFile(lpszFileName, ProcessFlags(lpszFileName, nOpenFlags))
{
	m_nFileCodePage = -1;
}

void CStdioFileEx::SetCodePage(IN const UINT nCodePage)
{
	m_nFileCodePage = (int)nCodePage; 
}

BOOL CStdioFileEx::Open(LPCTSTR lpszFileName,UINT nOpenFlags,CFileException* pError /*=NULL*/)
{
	// Process any Unicode stuff
	ProcessFlags(lpszFileName, nOpenFlags);

	return CStdioFile::Open(lpszFileName, nOpenFlags, pError);
}

BOOL CStdioFileEx::ReadString(CString& rString)
{
	const int	nMAX_LINE_CHARS = 4096;
	BOOL			bReadData = FALSE;
	LPTSTR		lpsz;
	int			nLen = 0;
	wchar_t*		pszUnicodeString = NULL;
	char	*		pszMultiByteString= NULL;
	int			nChars = 0;

	try
	{
		// If at position 0, discard byte-order mark before reading
		if (!m_pStream || (GetPosition() == 0 && m_bIsUnicodeText))
		{
			wchar_t	cDummy;
	//		Read(&cDummy, sizeof(_TCHAR));
			Read(&cDummy, sizeof(wchar_t));
		}

// If compiled for Unicode
#ifdef _UNICODE
		if (m_bIsUnicodeText)
		{
			// Do standard stuff - Unicode to Unicode. Seems to work OK.
			bReadData = CStdioFile::ReadString(rString);
		}
		else
		{
			pszUnicodeString	= new wchar_t[nMAX_LINE_CHARS]; 
			pszMultiByteString= new char[nMAX_LINE_CHARS]; 

			// Initialise to something safe
			memset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
			memset(pszMultiByteString, 0, sizeof(char) * nMAX_LINE_CHARS);
			
			// Read the string
			bReadData = (NULL != fgets(pszMultiByteString, nMAX_LINE_CHARS, m_pStream));

			if (bReadData)
			{
				// Convert multibyte to Unicode, using the specified code page
				nChars = GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString, nMAX_LINE_CHARS, m_nFileCodePage);

				if (nChars > 0)
				{
					rString = (CString)pszUnicodeString;
				}
			}
		}
#else

		if (!m_bIsUnicodeText)
		{
			// Do standard stuff -- read ANSI in ANSI
			bReadData = CStdioFile::ReadString(rString);

			// Get the current code page
			UINT nLocaleCodePage = GetCurrentLocaleCodePage();

			// If we got it OK...
			if (nLocaleCodePage > 0)
			{
				// if file code page does not match the system code page, we need to do a double conversion!
				if (nLocaleCodePage != (UINT)m_nFileCodePage)
				{
					int nStringBufferChars = rString.GetLength() + 1;

					pszUnicodeString	= new wchar_t[nStringBufferChars]; 

					// Initialise to something safe
					memset(pszUnicodeString, 0, sizeof(wchar_t) * nStringBufferChars);
					
					// Convert to Unicode using the file code page
					nChars = GetUnicodeStringFromMultiByteString(rString, pszUnicodeString, nStringBufferChars, m_nFileCodePage);

					// Convert back to multibyte using the system code page
					// (This doesn't really confer huge advantages except to avoid "mangling" of non-convertible special
					// characters. So, if a file in the E.European code page is displayed on a system using the 
					// western European code page, special accented characters which the system cannot display will be
					// replaced by the default character (a hash or something), rather than being incorrectly mapped to
					// other, western European accented characters).
					if (nChars > 0)
					{
						// Calculate how much we need for the MB buffer (it might be larger)
						nStringBufferChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString,nLocaleCodePage);
						pszMultiByteString= new char[nStringBufferChars];  

						nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nStringBufferChars, nLocaleCodePage);
						rString = (CString)pszMultiByteString;
					}
				}
			}
		}
		else
		{
			pszUnicodeString	= new wchar_t[nMAX_LINE_CHARS]; 

			// Initialise to something safe
			memset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
			
			// Read as Unicode, convert to ANSI

			// Bug fix by Dennis Jeryd 06/07/2003: initialise bReadData
			bReadData = (NULL != fgetws(pszUnicodeString, nMAX_LINE_CHARS, m_pStream));

			if (bReadData)
			{
				// Calculate how much we need for the multibyte string
				int nRequiredMBBuffer = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString,m_nFileCodePage);
				pszMultiByteString= new char[nRequiredMBBuffer];  

				nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nRequiredMBBuffer, m_nFileCodePage);

				if (nChars > 0)
				{
					rString = (CString)pszMultiByteString;
				}
			}

		}
#endif

		// Then remove end-of-line character if in Unicode text mode
		if (bReadData)
		{
			// Copied from FileTxt.cpp but adapted to Unicode and then adapted for end-of-line being just '\r'. 
			nLen = rString.GetLength();
			if (nLen > 1 && rString.Mid(nLen-2) == sNEWLINE)
			{
				rString.GetBufferSetLength(nLen-2);
			}
			else
			{
				lpsz = rString.GetBuffer(0);
				if (nLen != 0 && (lpsz[nLen-1] == _T('\r') || lpsz[nLen-1] == _T('\n')))
				{
					rString.GetBufferSetLength(nLen-1);
				}
			}
		}
	}
	// Ensure we always delete in case of exception
	catch(...)
	{
		if (pszUnicodeString)	delete [] pszUnicodeString;

		if (pszMultiByteString) delete [] pszMultiByteString;

		throw;
	}

	if (pszUnicodeString)		delete [] pszUnicodeString;

	if (pszMultiByteString)		delete [] pszMultiByteString;

	return bReadData;
}



// --------------------------------------------------------------------------------------------
//
//	CStdioFileEx::WriteString()
//
// --------------------------------------------------------------------------------------------
// Returns:    void
// Parameters: LPCTSTR lpsz
//
// Purpose:		Writes string to file either in Unicode or multibyte, depending on whether the caller specified the
//					CStdioFileEx::modeWriteUnicode flag. Override of base class function.
// Notes:		If writing in Unicode we need to:
//						a) Write the Byte-order-mark at the beginning of the file
//						b) Write all strings in byte-mode
//					-	If we were compiled in Unicode, we need to convert Unicode to multibyte if 
//						we want to write in multibyte
//					-	If we were compiled in multi-byte, we need to convert multibyte to Unicode if 
//						we want to write in Unicode.
// Exceptions:	None.
//
void CStdioFileEx::WriteString(LPCTSTR lpsz)
{
	wchar_t*	pszUnicodeString	= NULL; 
	char	*	pszMultiByteString= NULL;
	
	try
	{
		// If writing Unicode and at the start of the file, need to write byte mark
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			// If at position 0, write byte-order mark before writing anything else
			if (!m_pStream || GetPosition() == 0)
			{
				wchar_t cBOM = (wchar_t)nUNICODE_BOM;
				CFile::Write(&cBOM, sizeof(wchar_t));
			}
		}

// If compiled in Unicode...
#ifdef _UNICODE

		// If writing Unicode, no conversion needed
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			// Write in byte mode
			CFile::Write(lpsz, lstrlen(lpsz) * sizeof(wchar_t));
		}
		// Else if we don't want to write Unicode, need to convert
		else
		{
			int		nChars = lstrlen(lpsz) + 1;				// Why plus 1? Because yes
//			int		nBufferSize = nChars * sizeof(char);	// leave space for multi-byte chars
			int		nCharsWritten = 0;
			int		nBufferSize = 0;

			pszUnicodeString	= new wchar_t[nChars]; 

			// Copy string to Unicode buffer
			lstrcpy(pszUnicodeString, lpsz);

			// Work out how much space we need for the multibyte conversion
			nBufferSize = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);
			pszMultiByteString= new char[nBufferSize];  

			// Get multibyte string
			nCharsWritten = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nBufferSize, m_nFileCodePage);

			if (nCharsWritten > 0)
			{
				// Do byte-mode write using actual chars written (fix by Howard J Oh)
	//			CFile::Write((const void*)pszMultiByteString, lstrlen(lpsz));
				CFile::Write((const void*)pszMultiByteString,
					nCharsWritten*sizeof(char));
			}
		}
	// Else if *not* compiled in Unicode
#else
		// If writing Unicode, need to convert
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			int		nChars = lstrlen(lpsz) + 1;	 // Why plus 1? Because yes
			int		nBufferSize = nChars * sizeof(wchar_t);
			int		nCharsWritten = 0;

			pszUnicodeString	= new wchar_t[nChars];
			pszMultiByteString= new char[nChars]; 

			// Copy string to multibyte buffer
			lstrcpy(pszMultiByteString, lpsz);

			nCharsWritten = GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString, nChars, m_nFileCodePage);

			if (nCharsWritten > 0)
			{
				// Do byte-mode write using actual chars written (fix by Howard J Oh)
	//			CFile::Write(pszUnicodeString, lstrlen(lpsz) * sizeof(wchar_t));
				CFile::Write(pszUnicodeString, nCharsWritten*sizeof(wchar_t));
			}
			else
			{
				ASSERT(false);
			}

		}
		// Else if we don't want to write Unicode, no conversion needed, unless the code page differs
		else
		{
	//		// Do standard stuff
	//		CStdioFile::WriteString(lpsz);

			// Get the current code page
			UINT nLocaleCodePage = GetCurrentLocaleCodePage();

			// If we got it OK, and if file code page does not match the system code page, we need to do a double conversion!
			if (nLocaleCodePage > 0 && nLocaleCodePage != (UINT)m_nFileCodePage)
stdiofileex.cpp - 源码说明

本页面展示了「Encode/Decode UTF-8 format from/to text format」中的 stdiofileex.cpp 源码文件，采用 C++ 编程语言编写，共 666 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与format相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?