📄 markupstl.cpp

📁 解析xml文件的强大包
💻 CPP
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
// MarkupSTL.cpp: implementation of the CMarkupSTL class.
//
// Markup Release 8.3
// Copyright (C) 1999-2006 First Objective Software, Inc. All rights reserved
// Go to www.firstobject.com for the latest CMarkup and EDOM documentation
// Use in commercial applications requires written permission
// This software is provided "as is", with no warranty.

#include <stdio.h>
#include <string.h>
#include <errno.h>
#include "MarkupSTL.h"

using namespace std;

// Customization
#define x_EOL "\r\n" // can be \r\n or \n or empty
#define x_EOLLEN (sizeof(x_EOL)-1) // string length of x_EOL
#define x_ATTRIBQUOTE "\"" // can be double or single quote


void CMarkupSTL::operator=( const CMarkupSTL& markup )
{
	m_iPosParent = markup.m_iPosParent;
	m_iPos = markup.m_iPos;
	m_iPosChild = markup.m_iPosChild;
	m_iPosFree = markup.m_iPosFree;
	m_iPosDeleted = markup.m_iPosDeleted;
	m_nNodeType = markup.m_nNodeType;
	m_nNodeOffset = markup.m_nNodeOffset;
	m_nNodeLength = markup.m_nNodeLength;
	m_strDoc = markup.m_strDoc;
	m_strError = markup.m_strError;
	m_nFlags = markup.m_nFlags;

	// Copy used part of the index array
	m_aPos.RemoveAll();
	m_aPos.nSize = m_iPosFree;
	if ( m_aPos.nSize < 8 )
		m_aPos.nSize = 8;
	m_aPos.nSegs = m_aPos.SegsUsed();
	if ( m_aPos.nSegs )
	{
		m_aPos.pSegs = (ElemPos**)(new char[m_aPos.nSegs*sizeof(char*)]);
		int nSegSize = 1 << m_aPos.PA_SEGBITS;
		for ( int nSeg=0; nSeg < m_aPos.nSegs; ++nSeg )
		{
			if ( nSeg + 1 == m_aPos.nSegs )
				nSegSize = m_aPos.GetSize() - (nSeg << m_aPos.PA_SEGBITS);
			m_aPos.pSegs[nSeg] = (ElemPos*)(new char[nSegSize*sizeof(ElemPos)]);
			memcpy( m_aPos.pSegs[nSeg], markup.m_aPos.pSegs[nSeg], nSegSize*sizeof(ElemPos) );
		}
	}

	// Copy SavedPos map
	m_mapSavedPos.RemoveAll();
	if ( markup.m_mapSavedPos.pTable )
	{
		m_mapSavedPos.AllocMapTable();
		for ( int nSlot=0; nSlot < SavedPosMap::SPM_SIZE; ++nSlot )
		{
			SavedPos* pCopySavedPos = markup.m_mapSavedPos.pTable[nSlot];
			if ( pCopySavedPos )
			{
				int nCount = 0;
				while ( pCopySavedPos[nCount].nSavedPosFlags & SavedPosMap::SPM_USED )
				{
					++nCount;
					if ( pCopySavedPos[nCount-1].nSavedPosFlags & SavedPosMap::SPM_LAST )
						break;
				}
				if ( nCount )
				{
					SavedPos* pNewSavedPos = new SavedPos[nCount];
					for ( int nCopy=0; nCopy<nCount; ++nCopy )
						pNewSavedPos[nCopy] = pCopySavedPos[nCopy];
					pNewSavedPos[nCount-1].nSavedPosFlags |= SavedPosMap::SPM_LAST;
					m_mapSavedPos.pTable[nSlot] = pNewSavedPos;
				}
			}
		}
	}

	MARKUP_SETDEBUGSTATE;
}

bool CMarkupSTL::SetDoc( const char* szDoc )
{
	// Set document text
	if ( szDoc )
		m_strDoc = szDoc;
	else
		m_strDoc.erase();

	m_strError.erase();
	return x_ParseDoc();
};

bool CMarkupSTL::IsWellFormed()
{
	if ( m_aPos.GetSize()
			&& ! (m_aPos[0].nFlags & MNF_ILLFORMED)
			&& m_aPos[0].iElemChild
			&& ! m_aPos[m_aPos[0].iElemChild].iElemNext )
		return true;
	return false;
}

bool CMarkupSTL::Load( const char* szFileName )
{
	if ( ! ReadTextFile(szFileName, m_strDoc, &m_strError, &m_nFlags) )
		return false;
	return x_ParseDoc();
}

bool CMarkupSTL::ReadTextFile( const char* szFileName, string& strDoc, string* pstrError, int* pnFlags )
{
	// Static utility method to load text file into strDoc
	//
	// Open file to read binary
	FILE* fp = fopen( szFileName, "rb" );
	if ( ! fp )
	{
		if ( pstrError )
			*pstrError = x_GetLastError();
		return false;
	}

	// Set flags to 0 unless flags argument provided
	int nFlags = pnFlags?*pnFlags:0;
	char szDescBOM[20] = {0};
	char szResult[100];
	strDoc.erase();

	// Get file length
	fseek( fp, 0, SEEK_END );
	int nFileByteLen = ftell(fp);
	fseek( fp, 0, SEEK_SET );


	// Read file directly
	if ( nFileByteLen )
	{
		char* pszBuffer = new char[nFileByteLen];
		fread( pszBuffer, nFileByteLen, 1, fp );
		strDoc.assign( pszBuffer, nFileByteLen );
		delete [] pszBuffer;
	}
	sprintf( szResult, "%s%d bytes", szDescBOM, nFileByteLen );
	if ( pstrError )
		*pstrError = szResult;

	fclose( fp );
	if ( pnFlags )
		*pnFlags = nFlags;
	return true;
}

bool CMarkupSTL::Save( const char* szFileName )
{
	return WriteTextFile( szFileName, m_strDoc, &m_strError, &m_nFlags );
}

bool CMarkupSTL::WriteTextFile( const char* szFileName, string& strDoc, string* pstrError, int* pnFlags )
{
	// Static utility method to save strDoc to text file
	//
	// Open file to write binary
	bool bSuccess = true;
	FILE* fp = fopen( szFileName, "wb" );
	if ( ! fp )
	{
		if ( pstrError )
			*pstrError = x_GetLastError();
		return false;
	}

	// Set flags to 0 unless flags argument provided
	int nFlags = pnFlags?*pnFlags:0;
	char szDescBOM[20] = {0};
	char szResult[100];

	// Get document length
	int nDocLength = (int)strDoc.size();


	if ( nDocLength )
		bSuccess = ( fwrite( strDoc.c_str(), nDocLength, 1, fp ) == 1 );
	sprintf( szResult, "%s%d bytes", szDescBOM, nDocLength );
	if ( pstrError )
		*pstrError = szResult;
	
	if ( ! bSuccess && pstrError )
		*pstrError = x_GetLastError();
	fclose(fp);
	if ( pnFlags )
		*pnFlags = nFlags;
	return bSuccess;
}

bool CMarkupSTL::FindElem( const char* szName )
{
	// Change current position only if found
	//
	if ( m_aPos.GetSize() )
	{
		int iPos = x_FindElem( m_iPosParent, m_iPos, szName );
		if ( iPos )
		{
			// Assign new position
			x_SetPos( m_aPos[iPos].iElemParent, iPos, 0 );
			return true;
		}
	}
	return false;
}

bool CMarkupSTL::FindChildElem( const char* szName )
{
	// Change current child position only if found
	//
	// Shorthand: call this with no current main position
	// means find child under root element
	if ( ! m_iPos )
		FindElem();

	int iPosChild = x_FindElem( m_iPos, m_iPosChild, szName );
	if ( iPosChild )
	{
		// Assign new position
		int iPos = m_aPos[iPosChild].iElemParent;
		x_SetPos( m_aPos[iPos].iElemParent, iPos, iPosChild );
		return true;
	}

	return false;
}

string CMarkupSTL::EscapeText( const char* szText, int nFlags )
{
	// Convert text as seen outside XML document to XML friendly
	// replacing special characters with ampersand escape codes
	// E.g. convert "6>7" to "6&gt;7"
	//
	// &lt;   less than
	// &amp;  ampersand
	// &gt;   greater than
	//
	// and for attributes:
	//
	// &apos; apostrophe or single quote
	// &quot; double quote
	//
	static const char* szaReplace[] = { "&lt;","&amp;","&gt;","&apos;","&quot;" };
	const char* pFind = (nFlags&MNF_ESCAPEQUOTES)?"<&>\'\"":"<&>";
	string strText;
	const char* pSource = szText;
	int nDestSize = (int)strlen(pSource);
	nDestSize += nDestSize / 10 + 7;
	strText.reserve( nDestSize );
	char cSource = *pSource;
	const char* pFound;
	while ( cSource )
	{
		if ( (pFound=strchr(pFind,cSource)) != NULL )
		{
			bool bIgnoreAmpersand = false;
			if ( (nFlags&MNF_WITHREFS) && *pFound == '&' )
			{
				// Do not replace ampersand if it is start of any entity reference
				// &[#_:A-Za-zU][_:-.A-Za-z0-9U]*; where U is > 0x7f
				const char* pCheckEntity = pSource;
				++pCheckEntity;
				char c = *pCheckEntity;
				if ( (c>='A'&&c<='Z') || (c>='a'&&c<='z')
						|| c=='#' || c=='_' || c==':' || c>0x7f )
				{
					while ( 1 )
					{
						++pCheckEntity;
						c = *pCheckEntity;
						if ( c == ';' )
						{
							int nEntityLen = (int)(pCheckEntity - pSource) + 1;
							strText.append( pSource, nEntityLen );
							pSource = pCheckEntity;
							bIgnoreAmpersand = true;
						}
						else if ( (c>='A'&&c<='Z') || (c>='a'&&c<='z') || (c>='0'&&c<='9')
								|| c=='_' || c==':' || c=='-' || c=='.' || c>0x7f )
							continue;
						break;
					}
				}
			}
			if ( ! bIgnoreAmpersand )
			{
				pFound = szaReplace[pFound-pFind];
				strText.append( pFound );
			}
		}
		else
		{
			strText += cSource;
		}
		++pSource;
		cSource = *pSource;
	}
	return strText;
}

string CMarkupSTL::UnescapeText( const char* szText, int nTextLength /*=-1*/ )
{
	// Convert XML friendly text to text as seen outside XML document
	// ampersand escape codes replaced with special characters e.g. convert "6&gt;7" to "6>7"
	// ampersand numeric codes replaced with character e.g. convert &#60; to <
	// Conveniently the result is always the same or shorter in byte length
	//
	static const char* szaCode[] = { "lt;","amp;","gt;","apos;","quot;" };
	static int anCodeLen[] = { 3,4,3,5,5 };
	static const char* szSymbol = "<&>\'\"";
	string strText;
	const char* pSource = szText;
	if ( nTextLength == -1 )
		nTextLength = (int)strlen(szText);
	strText.reserve( nTextLength );
	int nChar = 0;
	while ( nChar < nTextLength )
	{
		if ( pSource[nChar] == '&' )
		{
			bool bCodeConverted = false;

			// Is it a numeric character reference?
			if ( pSource[nChar+1] == '#' )
			{
				// Is it a hex number?
				int nBase = 10;
				int nNumericChar = nChar + 2;
				char cChar = pSource[nNumericChar];
				if ( cChar == 'x' )
				{
					++nNumericChar;
					cChar = pSource[nNumericChar];
					nBase = 16;
				}

				// Look for terminating semi-colon within 7 characters
				int nCodeLen = 0;
				while ( nCodeLen < 7 && cChar && cChar != ';' )
				{
					// only ASCII digits 0-9, A-F, a-f expected
					++nCodeLen;
					cChar = pSource[nNumericChar + nCodeLen];
				}

				// Process unicode
				if ( cChar == ';' )
				{
					int nUnicode = strtol( &pSource[nNumericChar], NULL, nBase );
					/* MBCS
					int nMBLen = wctomb( &pDest[nLen], (wchar_t)nUnicode );
					if ( nMBLen > 0 )
						nLen += nMBLen;
					else
						nUnicode = 0;
					*/
					if ( nUnicode < 0x80 )
						strText += (char)nUnicode;
					else if ( nUnicode < 0x800 )
					{
						// Convert to 2-byte UTF-8
						strText += (char)(((nUnicode&0x7c0)>>6) | 0xc0);
						strText += (char)((nUnicode&0x3f) | 0x80);
					}
					else
					{
						// Convert to 3-byte UTF-8
						strText += (char)(((nUnicode&0xf000)>>12) | 0xe0);
						strText += (char)(((nUnicode&0xfc0)>>6) | 0x80);
						strText += (char)((nUnicode&0x3f) | 0x80);
					}
					if ( nUnicode )
					{
						// Increment index past ampersand semi-colon
						nChar = nNumericChar + nCodeLen + 1;
						bCodeConverted = true;
					}
				}
			}
			else // does not start with #
			{
				// Look for matching &code;
				for ( int nMatch = 0; nMatch < 5; ++nMatch )
				{
					if ( nChar < nTextLength - anCodeLen[nMatch]
						&& strncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0 )
					{
						// Insert symbol and increment index past ampersand semi-colon
						strText += szSymbol[nMatch];
						nChar += anCodeLen[nMatch] + 1;
						bCodeConverted = true;
						break;
					}
				}
			}

			// If the code is not converted, leave it as is
			if ( ! bCodeConverted )
			{
				strText += '&';
				++nChar;
			}
		}
		else // not &
		{
			strText += pSource[nChar];
			++nChar;
		}
	}
	return strText;
}


int CMarkupSTL::FindNode( int nType )
{
	// Change current node position only if a node is found
	// If nType is 0 find any node, otherwise find node of type nType
	// Return type of node or 0 if not found
	// If found node is an element, change m_iPos

	// Determine where in document to start scanning for node
	int nTypeFound = 0;
	int nNodeOffset = m_nNodeOffset;
	if ( m_nNodeType > 1 )
	{
		// By-pass current node
		nNodeOffset += m_nNodeLength;
	}
	else
	{
		// Set position to begin looking for node
		nNodeOffset = 0; // default to start of document
		if ( m_iPos )
		{
			// After element
			nNodeOffset = m_aPos[m_iPos].StartAfter();
		}
		else if ( m_iPosParent )
		{
			// Immediately after start tag of parent
			if ( m_aPos[m_iPosParent].IsEmptyElement() )
				return 0;
			else
				nNodeOffset = m_aPos[m_iPosParent].StartContent();
		}
	}

	// Get nodes until we find what we're looking for
	int iPosNew = m_iPos;
	TokenPos token( m_strDoc, m_nFlags );
	NodePos node;
	token.nNext = nNodeOffset;
	do
	{
		nNodeOffset = token.nNext;
		nTypeFound = x_ParseNode( token, node );
		if ( nTypeFound == 0 )
		{
			// Check if we have reached the end of the parent element
			// Otherwise it is a lone end tag
			if ( m_iPosParent && nNodeOffset == m_aPos[m_iPosParent].StartContent()
					+ m_aPos[m_iPosParent].ContentLen() )
				return 0;
			nTypeFound = MNT_LONE_END_TAG;
		}
		else if ( nTypeFound < 0 )
		{
			if ( nTypeFound == -2 )
				return 0;
			// -1 is node error
			nTypeFound = MNT_NODE_ERROR;
		}
		else if ( nTypeFound == MNT_ELEMENT )
12 3 4 5 下一页
💿 文件大小 465 K
👤 上传用户 jellylihui
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#xml
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -