markup.cpp

来自「最有名的一个VC++外部类,Cmarkup,现在已经出了最新版8.2了,现在网上」· C++ 代码 · 共 2,366 行 · 第 1/5 页
CPP
2,366 行
// Markup.cpp: implementation of the CMarkup class.
//
// Markup Release 8.2
// Copyright (C) 1999-2006 First Objective Software, Inc. All rights reserved
// Go to www.firstobject.com for the latest CMarkup and EDOM documentation
// Use in commercial applications requires written permission
// This software is provided "as is", with no warranty.

#include "stdafx.h"
#include "Markup.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

#ifdef _MBCS
#pragma message( "Note: MBCS build (not UTF-8)" )
// For UTF-8, remove _MBCS from project settings C/C++ preprocessor definitions
#endif

// Defines for Windows CE
#ifdef _WIN32_WCE
#define _tclen(p) 1
#define _tccpy(p1,p2) *(p1)=*(p2)
#endif

// Customization
#define x_EOL _T("\r\n") // can be \r\n or \n or empty
#define x_EOLLEN (sizeof(x_EOL)/sizeof(_TCHAR)-1) // string length of x_EOL
#define x_ATTRIBQUOTE _T("\"") // can be double or single quote


void CMarkup::operator=( const CMarkup& markup )
{
	m_iPosParent = markup.m_iPosParent;
	m_iPos = markup.m_iPos;
	m_iPosChild = markup.m_iPosChild;
	m_iPosFree = markup.m_iPosFree;
	m_iPosDeleted = markup.m_iPosDeleted;
	m_nNodeType = markup.m_nNodeType;
	m_nNodeOffset = markup.m_nNodeOffset;
	m_nNodeLength = markup.m_nNodeLength;
	m_strDoc = markup.m_strDoc;
	m_strError = markup.m_strError;
	m_nFlags = markup.m_nFlags;

	// Copy used part of the index array
	m_aPos.RemoveAll();
	m_aPos.nSize = m_iPosFree;
	if ( m_aPos.nSize < 8 )
		m_aPos.nSize = 8;
	m_aPos.nSegs = m_aPos.SegsUsed();
	if ( m_aPos.nSegs )
	{
		m_aPos.pSegs = (ElemPos**)(new char[m_aPos.nSegs*sizeof(char*)]);
		int nSegSize = 1 << m_aPos.PA_SEGBITS;
		for ( int nSeg=0; nSeg < m_aPos.nSegs; ++nSeg )
		{
			if ( nSeg + 1 == m_aPos.nSegs )
				nSegSize = m_aPos.GetSize() - (nSeg << m_aPos.PA_SEGBITS);
			m_aPos.pSegs[nSeg] = (ElemPos*)(new char[nSegSize*sizeof(ElemPos)]);
			memcpy( m_aPos.pSegs[nSeg], markup.m_aPos.pSegs[nSeg], nSegSize*sizeof(ElemPos) );
		}
	}

	// Copy SavedPos map
	m_mapSavedPos.RemoveAll();
	if ( markup.m_mapSavedPos.pTable )
	{
		m_mapSavedPos.AllocMapTable();
		for ( int nSlot=0; nSlot < SavedPosMap::SPM_SIZE; ++nSlot )
		{
			SavedPos* pCopySavedPos = markup.m_mapSavedPos.pTable[nSlot];
			if ( pCopySavedPos )
			{
				int nCount = 0;
				while ( pCopySavedPos[nCount].nSavedPosFlags & SavedPosMap::SPM_USED )
				{
					++nCount;
					if ( pCopySavedPos[nCount-1].nSavedPosFlags & SavedPosMap::SPM_LAST )
						break;
				}
				SavedPos* pNewSavedPos = new SavedPos[nCount];
				for ( int nCopy=0; nCopy<nCount; ++nCopy )
					pNewSavedPos[nCopy] = pCopySavedPos[nCopy];
				pNewSavedPos[nCount-1].nSavedPosFlags |= SavedPosMap::SPM_LAST;
				m_mapSavedPos.pTable[nSlot] = pNewSavedPos;
			}
		}
	}

	MARKUP_SETDEBUGSTATE;
}

bool CMarkup::SetDoc( LPCTSTR szDoc )
{
	// Set document text
	if ( szDoc )
		m_strDoc = szDoc;
	else
		m_strDoc.Empty();

	m_strError.Empty();
	return x_ParseDoc();
};

bool CMarkup::IsWellFormed()
{
	if ( m_aPos.GetSize()
			&& ! (m_aPos[0].nFlags & MNF_ILLFORMED)
			&& m_aPos[0].iElemChild
			&& ! m_aPos[m_aPos[0].iElemChild].iElemNext )
		return true;
	return false;
}

bool CMarkup::Load( LPCTSTR szFileName )
{
	if ( ! ReadTextFile(szFileName, m_strDoc, &m_strError, &m_nFlags) )
		return false;
	return x_ParseDoc();
}

bool CMarkup::ReadTextFile( LPCTSTR szFileName, CString& strDoc, CString* pstrError, int* pnFlags )
{
	// Static utility method to load text file into strDoc
	//
	// Open file to read binary
	FILE* fp = _tfopen( szFileName, _T("rb") );
	if ( ! fp )
	{
		if ( pstrError )
			*pstrError = strerror(errno);
		return false;
	}

	// Set flags to 0 unless flags argument provided
	int nFlags = pnFlags?*pnFlags:0;
	_TCHAR szDescBOM[20] = {0};
	strDoc.Empty();

	// Get file length
	fseek( fp, 0, SEEK_END );
	int nFileByteLen = ftell( fp );
	fseek( fp, 0, SEEK_SET );


#if defined(_UNICODE) // convert file to wide char
	int nWideLen = 0;
	if ( nFileByteLen )
	{
		char* pBuffer = new char[nFileByteLen];
		fread( pBuffer, nFileByteLen, 1, fp );
		// For ANSI files, replace CP_UTF8 with CP_ACP in both places
		nWideLen = MultiByteToWideChar(CP_UTF8,0,pBuffer,nFileByteLen,NULL,0);
		MultiByteToWideChar(CP_UTF8,0,pBuffer,nFileByteLen,strDoc.GetBuffer(nWideLen),nWideLen);
		strDoc.ReleaseBuffer( nWideLen );
		delete [] pBuffer;
	}
	if ( pstrError )
		(*pstrError).Format(_T("%s%d bytes to %d wide chars"),szDescBOM,nFileByteLen,nWideLen);
#else // read file directly
	if ( nFileByteLen )
	{
		fread( strDoc.GetBuffer(nFileByteLen), nFileByteLen, 1, fp );
		strDoc.ReleaseBuffer( nFileByteLen );
	}
	if ( pstrError )
		(*pstrError).Format( _T("%s%d bytes"), szDescBOM, nFileByteLen );
#endif
	fclose( fp );
	if ( pnFlags )
		*pnFlags = nFlags;
	return true;
}

bool CMarkup::Save( LPCTSTR szFileName )
{
	return WriteTextFile( szFileName, m_strDoc, &m_strError, &m_nFlags );
}

bool CMarkup::WriteTextFile( LPCTSTR szFileName, CString& strDoc, CString* pstrError, int* pnFlags )
{
	// Static utility method to save strDoc to text file
	//
	// Open file to write binary
	bool bSuccess = true;
	FILE* fp = _tfopen( szFileName, _T("wb") );
	if ( ! fp )
	{
		if ( pstrError )
			*pstrError = strerror(errno);
		return false;
	}

	// Set flags to 0 unless flags argument provided
	int nFlags = pnFlags?*pnFlags:0;
	_TCHAR szDescBOM[20] = {0};

	// Get document length
	int nDocLength = strDoc.GetLength();


#if defined( _UNICODE )
	int nMBLen = 0;
	if ( nDocLength )
	{
		// For ANSI files, replace CP_UTF8 with CP_ACP in both places
		nMBLen = WideCharToMultiByte(CP_UTF8,0,strDoc,nDocLength,NULL,0,NULL,NULL);
		char* pBuffer = new char[nMBLen+1];
		WideCharToMultiByte(CP_UTF8,0,strDoc,nDocLength,pBuffer,nMBLen+1,NULL,NULL);
		bSuccess = ( fwrite( pBuffer, nMBLen, 1, fp ) == 1 );
		delete [] pBuffer;
	}
	if ( pstrError )
		(*pstrError).Format( _T("%d wide chars to %s%d bytes"), nDocLength, szDescBOM, nMBLen );
#else // MBCS or UTF-8
	if ( nDocLength )
	{
		CString strDocWrite = strDoc; // reference unless converted
		nDocLength = strDocWrite.GetLength();
		bSuccess = ( fwrite( (LPCTSTR)strDocWrite, nDocLength, 1, fp ) == 1 );
	}
	if ( pstrError )
		(*pstrError).Format( _T("%s%d bytes"), szDescBOM, nDocLength );
#endif
	
	if ( ! bSuccess && pstrError )
		*pstrError = strerror(errno);
	fclose(fp);
	if ( pnFlags )
		*pnFlags = nFlags;
	return bSuccess;
}

bool CMarkup::FindElem( LPCTSTR szName )
{
	// Change current position only if found
	//
	if ( m_aPos.GetSize() )
	{
		int iPos = x_FindElem( m_iPosParent, m_iPos, szName );
		if ( iPos )
		{
			// Assign new position
			x_SetPos( m_aPos[iPos].iElemParent, iPos, 0 );
			return true;
		}
	}
	return false;
}

bool CMarkup::FindChildElem( LPCTSTR szName )
{
	// Change current child position only if found
	//
	// Shorthand: call this with no current main position
	// means find child under root element
	if ( ! m_iPos )
		FindElem();

	int iPosChild = x_FindElem( m_iPos, m_iPosChild, szName );
	if ( iPosChild )
	{
		// Assign new position
		int iPos = m_aPos[iPosChild].iElemParent;
		x_SetPos( m_aPos[iPos].iElemParent, iPos, iPosChild );
		return true;
	}

	return false;
}

CString CMarkup::EscapeText( LPCTSTR szText, int nFlags )
{
	// Convert text as seen outside XML document to XML friendly
	// replacing special characters with ampersand escape codes
	// E.g. convert "6>7" to "6&gt;7"
	//
	// &lt;   less than
	// &amp;  ampersand
	// &gt;   greater than
	//
	// and for attributes:
	//
	// &apos; apostrophe or single quote
	// &quot; double quote
	//
	static LPCTSTR szaReplace[] = { _T("&lt;"),_T("&amp;"),_T("&gt;"),_T("&apos;"),_T("&quot;") };
	LPCTSTR pFind = (nFlags&MNF_ESCAPEQUOTES)?_T("<&>\'\""):_T("<&>");
	CString strText;
	LPCTSTR pSource = szText;
	int nDestSize = (int)_tcslen(pSource);
	nDestSize += nDestSize / 10 + 7;
	_TCHAR* pDest = strText.GetBuffer(nDestSize);
	int nLen = 0;
	_TCHAR cSource = *pSource;
	LPCTSTR pFound;
	while ( cSource )
	{
		if ( nLen > nDestSize - 6 )
		{
			strText.ReleaseBuffer(nLen);
			nDestSize *= 2;
			pDest = strText.GetBuffer(nDestSize);
		}
		if ( (pFound=_tcschr(pFind,cSource)) != NULL )
		{
			bool bIgnoreAmpersand = false;
			if ( (nFlags&MNF_WITHREFS) && *pFound == _T('&') )
			{
				// Do not replace ampersand if it is start of any entity reference
				// &[#_:A-Za-zU][_:-.A-Za-z0-9U]*; where U is > 0x7f
				LPCTSTR pCheckEntity = pSource;
				++pCheckEntity;
				_TCHAR c = *pCheckEntity;
				if ( (c>=_T('A')&&c<=_T('Z')) || (c>=_T('a')&&c<=_T('z'))
						|| c==_T('#') || c==_T('_') || c==_T(':') || c>0x7f )
				{
					while ( 1 )
					{
						pCheckEntity += _tclen( pCheckEntity );
						c = *pCheckEntity;
						if ( c == _T(';') )
						{
							int nEntityLen = (int)(pCheckEntity - pSource) + 1;
							_tcsncpy(&pDest[nLen],pSource,nEntityLen);
							nLen += nEntityLen;
							pSource = pCheckEntity;
							bIgnoreAmpersand = true;
						}
						else if ( (c>=_T('A')&&c<=_T('Z')) || (c>=_T('a')&&c<=_T('z')) || (c>=_T('0')&&c<=_T('9'))
								|| c==_T('_') || c==_T(':') || c==_T('-') || c==_T('.') || c>0x7f )
							continue;
						break;
					}
				}
			}
			if ( ! bIgnoreAmpersand )
			{
				pFound = szaReplace[pFound-pFind];
				_tcscpy(&pDest[nLen],pFound);
				nLen += (int)_tcslen(pFound);
			}
		}
		else
		{
			_tccpy( &pDest[nLen], pSource );
			nLen += (int)_tclen( pSource );
		}
		pSource += _tclen( pSource );
		cSource = *pSource;
	}
	strText.ReleaseBuffer(nLen);
	return strText;
}

CString CMarkup::UnescapeText( LPCTSTR szText, int nTextLength /*=-1*/ )
{
	// Convert XML friendly text to text as seen outside XML document
	// ampersand escape codes replaced with special characters e.g. convert "6&gt;7" to "6>7"
	// ampersand numeric codes replaced with character e.g. convert &#60; to <
	// Conveniently the result is always the same or shorter in byte length
	//
	static LPCTSTR szaCode[] = { _T("lt;"),_T("amp;"),_T("gt;"),_T("apos;"),_T("quot;") };
	static int anCodeLen[] = { 3,4,3,5,5 };
	static LPCTSTR szSymbol = _T("<&>\'\"");
	CString strText;
	LPCTSTR pSource = szText;
	if ( nTextLength == -1 )
		nTextLength = (int)_tcslen(szText);
	_TCHAR* pDest = strText.GetBuffer( nTextLength );
	int nLen = 0;
	int nCharLen;
	int nChar = 0;
	while ( nChar < nTextLength )
	{
		if ( pSource[nChar] == _T('&') )
		{
			bool bCodeConverted = false;

			// Is it a numeric character reference?
			if ( pSource[nChar+1] == _T('#') )
			{
				// Is it a hex number?
				int nBase = 10;
				int nNumericChar = nChar + 2;
				_TCHAR cChar = pSource[nNumericChar];
				if ( cChar == _T('x') )
				{
					++nNumericChar;
					cChar = pSource[nNumericChar];
					nBase = 16;
				}

				// Look for terminating semi-colon within 7 characters
				int nCodeLen = 0;
				while ( nCodeLen < 7 && cChar && cChar != _T(';') )
				{
					// only ASCII digits 0-9, A-F, a-f expected
					nCodeLen += (int)_tclen( &pSource[nNumericChar+nCodeLen] );
					cChar = pSource[nNumericChar + nCodeLen];
				}

				// Process unicode
				if ( cChar == _T(';') )
				{
					int nUnicode = _tcstol( &pSource[nNumericChar], NULL, nBase );
#if defined(_UNICODE)
					pDest[nLen++] = (_TCHAR)nUnicode;
#elif defined(_MBCS)
					int nMBLen = wctomb( &pDest[nLen], (wchar_t)nUnicode );
					if ( nMBLen > 0 )
						nLen += nMBLen;
					else
						nUnicode = 0;
#else
					if ( nUnicode < 0x80 )
						pDest[nLen++] = (_TCHAR)nUnicode;
					else if ( nUnicode < 0x800 )
					{
						// Convert to 2-byte UTF-8
						pDest[nLen++] = (_TCHAR)(((nUnicode&0x7c0)>>6) | 0xc0);
						pDest[nLen++] = (_TCHAR)((nUnicode&0x3f) | 0x80);
					}
					else
					{
						// Convert to 3-byte UTF-8
						pDest[nLen++] = (_TCHAR)(((nUnicode&0xf000)>>12) | 0xe0);
						pDest[nLen++] = (_TCHAR)(((nUnicode&0xfc0)>>6) | 0x80);
						pDest[nLen++] = (_TCHAR)((nUnicode&0x3f) | 0x80);
					}
#endif
					if ( nUnicode )
					{
						// Increment index past ampersand semi-colon
						nChar = nNumericChar + nCodeLen + 1;
						bCodeConverted = true;
					}
				}
			}
			else // does not start with #
			{
				// Look for matching &code;
				for ( int nMatch = 0; nMatch < 5; ++nMatch )
				{
					if ( nChar < nTextLength - anCodeLen[nMatch]
						&& _tcsncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0 )
					{
						// Insert symbol and increment index past ampersand semi-colon
						pDest[nLen++] = szSymbol[nMatch];
						nChar += anCodeLen[nMatch] + 1;
						bCodeConverted = true;
						break;
					}
				}
			}

			// If the code is not converted, leave it as is
			if ( ! bCodeConverted )
			{
				pDest[nLen++] = _T('&');
				++nChar;
			}
		}
		else // not &
		{
			nCharLen = (int)_tclen(&pSource[nChar]);
			_tccpy( &pDest[nLen], &pSource[nChar] );
			nLen += nCharLen;
			nChar += nCharLen;
		}
markup.cpp - 源码说明

本页面展示了「最有名的一个VC++外部类,Cmarkup,现在已经出了最新版8.2了,现在网上的都是6.1以下的版,希望这个新版对你们有所帮助,一些相用VC开发VML的朋友千万不要错过」中的 markup.cpp 源码文件，采用 C++ 编程语言编写，共 2,366 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫开发者社区收录了大量与VC++相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?