📄 markupstl.cpp
字号:
// MarkupSTL.cpp: implementation of the CMarkupSTL class.
//
// Markup Release 8.3
// Copyright (C) 1999-2006 First Objective Software, Inc. All rights reserved
// Go to www.firstobject.com for the latest CMarkup and EDOM documentation
// Use in commercial applications requires written permission
// This software is provided "as is", with no warranty.
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include "MarkupSTL.h"
using namespace std;
// Customization
#define x_EOL "\r\n" // can be \r\n or \n or empty
#define x_EOLLEN (sizeof(x_EOL)-1) // string length of x_EOL
#define x_ATTRIBQUOTE "\"" // can be double or single quote
void CMarkupSTL::operator=( const CMarkupSTL& markup )
{
m_iPosParent = markup.m_iPosParent;
m_iPos = markup.m_iPos;
m_iPosChild = markup.m_iPosChild;
m_iPosFree = markup.m_iPosFree;
m_iPosDeleted = markup.m_iPosDeleted;
m_nNodeType = markup.m_nNodeType;
m_nNodeOffset = markup.m_nNodeOffset;
m_nNodeLength = markup.m_nNodeLength;
m_strDoc = markup.m_strDoc;
m_strError = markup.m_strError;
m_nFlags = markup.m_nFlags;
// Copy used part of the index array
m_aPos.RemoveAll();
m_aPos.nSize = m_iPosFree;
if ( m_aPos.nSize < 8 )
m_aPos.nSize = 8;
m_aPos.nSegs = m_aPos.SegsUsed();
if ( m_aPos.nSegs )
{
m_aPos.pSegs = (ElemPos**)(new char[m_aPos.nSegs*sizeof(char*)]);
int nSegSize = 1 << m_aPos.PA_SEGBITS;
for ( int nSeg=0; nSeg < m_aPos.nSegs; ++nSeg )
{
if ( nSeg + 1 == m_aPos.nSegs )
nSegSize = m_aPos.GetSize() - (nSeg << m_aPos.PA_SEGBITS);
m_aPos.pSegs[nSeg] = (ElemPos*)(new char[nSegSize*sizeof(ElemPos)]);
memcpy( m_aPos.pSegs[nSeg], markup.m_aPos.pSegs[nSeg], nSegSize*sizeof(ElemPos) );
}
}
// Copy SavedPos map
m_mapSavedPos.RemoveAll();
if ( markup.m_mapSavedPos.pTable )
{
m_mapSavedPos.AllocMapTable();
for ( int nSlot=0; nSlot < SavedPosMap::SPM_SIZE; ++nSlot )
{
SavedPos* pCopySavedPos = markup.m_mapSavedPos.pTable[nSlot];
if ( pCopySavedPos )
{
int nCount = 0;
while ( pCopySavedPos[nCount].nSavedPosFlags & SavedPosMap::SPM_USED )
{
++nCount;
if ( pCopySavedPos[nCount-1].nSavedPosFlags & SavedPosMap::SPM_LAST )
break;
}
if ( nCount )
{
SavedPos* pNewSavedPos = new SavedPos[nCount];
for ( int nCopy=0; nCopy<nCount; ++nCopy )
pNewSavedPos[nCopy] = pCopySavedPos[nCopy];
pNewSavedPos[nCount-1].nSavedPosFlags |= SavedPosMap::SPM_LAST;
m_mapSavedPos.pTable[nSlot] = pNewSavedPos;
}
}
}
}
MARKUP_SETDEBUGSTATE;
}
bool CMarkupSTL::SetDoc( const char* szDoc )
{
// Set document text
if ( szDoc )
m_strDoc = szDoc;
else
m_strDoc.erase();
m_strError.erase();
return x_ParseDoc();
};
bool CMarkupSTL::IsWellFormed()
{
if ( m_aPos.GetSize()
&& ! (m_aPos[0].nFlags & MNF_ILLFORMED)
&& m_aPos[0].iElemChild
&& ! m_aPos[m_aPos[0].iElemChild].iElemNext )
return true;
return false;
}
bool CMarkupSTL::Load( const char* szFileName )
{
if ( ! ReadTextFile(szFileName, m_strDoc, &m_strError, &m_nFlags) )
return false;
return x_ParseDoc();
}
bool CMarkupSTL::ReadTextFile( const char* szFileName, string& strDoc, string* pstrError, int* pnFlags )
{
// Static utility method to load text file into strDoc
//
// Open file to read binary
FILE* fp = fopen( szFileName, "rb" );
if ( ! fp )
{
if ( pstrError )
*pstrError = x_GetLastError();
return false;
}
// Set flags to 0 unless flags argument provided
int nFlags = pnFlags?*pnFlags:0;
char szDescBOM[20] = {0};
char szResult[100];
strDoc.erase();
// Get file length
fseek( fp, 0, SEEK_END );
int nFileByteLen = ftell(fp);
fseek( fp, 0, SEEK_SET );
// Read file directly
if ( nFileByteLen )
{
char* pszBuffer = new char[nFileByteLen];
fread( pszBuffer, nFileByteLen, 1, fp );
strDoc.assign( pszBuffer, nFileByteLen );
delete [] pszBuffer;
}
sprintf( szResult, "%s%d bytes", szDescBOM, nFileByteLen );
if ( pstrError )
*pstrError = szResult;
fclose( fp );
if ( pnFlags )
*pnFlags = nFlags;
return true;
}
bool CMarkupSTL::Save( const char* szFileName )
{
return WriteTextFile( szFileName, m_strDoc, &m_strError, &m_nFlags );
}
bool CMarkupSTL::WriteTextFile( const char* szFileName, string& strDoc, string* pstrError, int* pnFlags )
{
// Static utility method to save strDoc to text file
//
// Open file to write binary
bool bSuccess = true;
FILE* fp = fopen( szFileName, "wb" );
if ( ! fp )
{
if ( pstrError )
*pstrError = x_GetLastError();
return false;
}
// Set flags to 0 unless flags argument provided
int nFlags = pnFlags?*pnFlags:0;
char szDescBOM[20] = {0};
char szResult[100];
// Get document length
int nDocLength = (int)strDoc.size();
if ( nDocLength )
bSuccess = ( fwrite( strDoc.c_str(), nDocLength, 1, fp ) == 1 );
sprintf( szResult, "%s%d bytes", szDescBOM, nDocLength );
if ( pstrError )
*pstrError = szResult;
if ( ! bSuccess && pstrError )
*pstrError = x_GetLastError();
fclose(fp);
if ( pnFlags )
*pnFlags = nFlags;
return bSuccess;
}
bool CMarkupSTL::FindElem( const char* szName )
{
// Change current position only if found
//
if ( m_aPos.GetSize() )
{
int iPos = x_FindElem( m_iPosParent, m_iPos, szName );
if ( iPos )
{
// Assign new position
x_SetPos( m_aPos[iPos].iElemParent, iPos, 0 );
return true;
}
}
return false;
}
bool CMarkupSTL::FindChildElem( const char* szName )
{
// Change current child position only if found
//
// Shorthand: call this with no current main position
// means find child under root element
if ( ! m_iPos )
FindElem();
int iPosChild = x_FindElem( m_iPos, m_iPosChild, szName );
if ( iPosChild )
{
// Assign new position
int iPos = m_aPos[iPosChild].iElemParent;
x_SetPos( m_aPos[iPos].iElemParent, iPos, iPosChild );
return true;
}
return false;
}
string CMarkupSTL::EscapeText( const char* szText, int nFlags )
{
// Convert text as seen outside XML document to XML friendly
// replacing special characters with ampersand escape codes
// E.g. convert "6>7" to "6>7"
//
// < less than
// & ampersand
// > greater than
//
// and for attributes:
//
// ' apostrophe or single quote
// " double quote
//
static const char* szaReplace[] = { "<","&",">","'",""" };
const char* pFind = (nFlags&MNF_ESCAPEQUOTES)?"<&>\'\"":"<&>";
string strText;
const char* pSource = szText;
int nDestSize = (int)strlen(pSource);
nDestSize += nDestSize / 10 + 7;
strText.reserve( nDestSize );
char cSource = *pSource;
const char* pFound;
while ( cSource )
{
if ( (pFound=strchr(pFind,cSource)) != NULL )
{
bool bIgnoreAmpersand = false;
if ( (nFlags&MNF_WITHREFS) && *pFound == '&' )
{
// Do not replace ampersand if it is start of any entity reference
// &[#_:A-Za-zU][_:-.A-Za-z0-9U]*; where U is > 0x7f
const char* pCheckEntity = pSource;
++pCheckEntity;
char c = *pCheckEntity;
if ( (c>='A'&&c<='Z') || (c>='a'&&c<='z')
|| c=='#' || c=='_' || c==':' || c>0x7f )
{
while ( 1 )
{
++pCheckEntity;
c = *pCheckEntity;
if ( c == ';' )
{
int nEntityLen = (int)(pCheckEntity - pSource) + 1;
strText.append( pSource, nEntityLen );
pSource = pCheckEntity;
bIgnoreAmpersand = true;
}
else if ( (c>='A'&&c<='Z') || (c>='a'&&c<='z') || (c>='0'&&c<='9')
|| c=='_' || c==':' || c=='-' || c=='.' || c>0x7f )
continue;
break;
}
}
}
if ( ! bIgnoreAmpersand )
{
pFound = szaReplace[pFound-pFind];
strText.append( pFound );
}
}
else
{
strText += cSource;
}
++pSource;
cSource = *pSource;
}
return strText;
}
string CMarkupSTL::UnescapeText( const char* szText, int nTextLength /*=-1*/ )
{
// Convert XML friendly text to text as seen outside XML document
// ampersand escape codes replaced with special characters e.g. convert "6>7" to "6>7"
// ampersand numeric codes replaced with character e.g. convert < to <
// Conveniently the result is always the same or shorter in byte length
//
static const char* szaCode[] = { "lt;","amp;","gt;","apos;","quot;" };
static int anCodeLen[] = { 3,4,3,5,5 };
static const char* szSymbol = "<&>\'\"";
string strText;
const char* pSource = szText;
if ( nTextLength == -1 )
nTextLength = (int)strlen(szText);
strText.reserve( nTextLength );
int nChar = 0;
while ( nChar < nTextLength )
{
if ( pSource[nChar] == '&' )
{
bool bCodeConverted = false;
// Is it a numeric character reference?
if ( pSource[nChar+1] == '#' )
{
// Is it a hex number?
int nBase = 10;
int nNumericChar = nChar + 2;
char cChar = pSource[nNumericChar];
if ( cChar == 'x' )
{
++nNumericChar;
cChar = pSource[nNumericChar];
nBase = 16;
}
// Look for terminating semi-colon within 7 characters
int nCodeLen = 0;
while ( nCodeLen < 7 && cChar && cChar != ';' )
{
// only ASCII digits 0-9, A-F, a-f expected
++nCodeLen;
cChar = pSource[nNumericChar + nCodeLen];
}
// Process unicode
if ( cChar == ';' )
{
int nUnicode = strtol( &pSource[nNumericChar], NULL, nBase );
/* MBCS
int nMBLen = wctomb( &pDest[nLen], (wchar_t)nUnicode );
if ( nMBLen > 0 )
nLen += nMBLen;
else
nUnicode = 0;
*/
if ( nUnicode < 0x80 )
strText += (char)nUnicode;
else if ( nUnicode < 0x800 )
{
// Convert to 2-byte UTF-8
strText += (char)(((nUnicode&0x7c0)>>6) | 0xc0);
strText += (char)((nUnicode&0x3f) | 0x80);
}
else
{
// Convert to 3-byte UTF-8
strText += (char)(((nUnicode&0xf000)>>12) | 0xe0);
strText += (char)(((nUnicode&0xfc0)>>6) | 0x80);
strText += (char)((nUnicode&0x3f) | 0x80);
}
if ( nUnicode )
{
// Increment index past ampersand semi-colon
nChar = nNumericChar + nCodeLen + 1;
bCodeConverted = true;
}
}
}
else // does not start with #
{
// Look for matching &code;
for ( int nMatch = 0; nMatch < 5; ++nMatch )
{
if ( nChar < nTextLength - anCodeLen[nMatch]
&& strncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0 )
{
// Insert symbol and increment index past ampersand semi-colon
strText += szSymbol[nMatch];
nChar += anCodeLen[nMatch] + 1;
bCodeConverted = true;
break;
}
}
}
// If the code is not converted, leave it as is
if ( ! bCodeConverted )
{
strText += '&';
++nChar;
}
}
else // not &
{
strText += pSource[nChar];
++nChar;
}
}
return strText;
}
int CMarkupSTL::FindNode( int nType )
{
// Change current node position only if a node is found
// If nType is 0 find any node, otherwise find node of type nType
// Return type of node or 0 if not found
// If found node is an element, change m_iPos
// Determine where in document to start scanning for node
int nTypeFound = 0;
int nNodeOffset = m_nNodeOffset;
if ( m_nNodeType > 1 )
{
// By-pass current node
nNodeOffset += m_nNodeLength;
}
else
{
// Set position to begin looking for node
nNodeOffset = 0; // default to start of document
if ( m_iPos )
{
// After element
nNodeOffset = m_aPos[m_iPos].StartAfter();
}
else if ( m_iPosParent )
{
// Immediately after start tag of parent
if ( m_aPos[m_iPosParent].IsEmptyElement() )
return 0;
else
nNodeOffset = m_aPos[m_iPosParent].StartContent();
}
}
// Get nodes until we find what we're looking for
int iPosNew = m_iPos;
TokenPos token( m_strDoc, m_nFlags );
NodePos node;
token.nNext = nNodeOffset;
do
{
nNodeOffset = token.nNext;
nTypeFound = x_ParseNode( token, node );
if ( nTypeFound == 0 )
{
// Check if we have reached the end of the parent element
// Otherwise it is a lone end tag
if ( m_iPosParent && nNodeOffset == m_aPos[m_iPosParent].StartContent()
+ m_aPos[m_iPosParent].ContentLen() )
return 0;
nTypeFound = MNT_LONE_END_TAG;
}
else if ( nTypeFound < 0 )
{
if ( nTypeFound == -2 )
return 0;
// -1 is node error
nTypeFound = MNT_NODE_ERROR;
}
else if ( nTypeFound == MNT_ELEMENT )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -