📄 xmlparse.cpp
字号:
{
bool bCodeConverted = false;
// Is it a numeric character reference?
if ( pSource[nChar+1] == _T('#') )
{
// Is it a hex number?
int nBase = 10;
int nNumericChar = nChar + 2;
_TCHAR cChar = pSource[nNumericChar];
if ( cChar == _T('x') )
{
++nNumericChar;
cChar = pSource[nNumericChar];
nBase = 16;
}
// Look for terminating semi-colon within 7 characters
int nCodeLen = 0;
while ( nCodeLen < 7 && cChar && cChar != _T(';') )
{
// only ASCII digits 0-9, A-F, a-f expected
nCodeLen += (int)_tclen( &pSource[nNumericChar+nCodeLen] );
cChar = pSource[nNumericChar + nCodeLen];
}
// Process unicode
if ( cChar == _T(';') )
{
int nUnicode = _tcstol( &pSource[nNumericChar], NULL, nBase );
#if defined(_UNICODE)
pDest[nLen++] = (_TCHAR)nUnicode;
#elif defined(_MBCS)
int nMBLen = wctomb( &pDest[nLen], (wchar_t)nUnicode );
if ( nMBLen > 0 )
nLen += nMBLen;
else
nUnicode = 0;
#else
if ( nUnicode < 0x80 )
pDest[nLen++] = (_TCHAR)nUnicode;
else if ( nUnicode < 0x800 )
{
// Convert to 2-byte UTF-8
pDest[nLen++] = (_TCHAR)(((nUnicode&0x7c0)>>6) | 0xc0);
pDest[nLen++] = (_TCHAR)((nUnicode&0x3f) | 0x80);
}
else
{
// Convert to 3-byte UTF-8
pDest[nLen++] = (_TCHAR)(((nUnicode&0xf000)>>12) | 0xe0);
pDest[nLen++] = (_TCHAR)(((nUnicode&0xfc0)>>6) | 0x80);
pDest[nLen++] = (_TCHAR)((nUnicode&0x3f) | 0x80);
}
#endif
if ( nUnicode )
{
// Increment index past ampersand semi-colon
nChar = nNumericChar + nCodeLen + 1;
bCodeConverted = true;
}
}
}
else // does not start with #
{
// Look for matching &code;
for ( int nMatch = 0; nMatch < 5; ++nMatch )
{
if ( nChar < nTextLength - anCodeLen[nMatch]
&& _tcsncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0 )
{
// Insert symbol and increment index past ampersand semi-colon
pDest[nLen++] = szSymbol[nMatch];
nChar += anCodeLen[nMatch] + 1;
bCodeConverted = true;
break;
}
}
}
// If the code is not converted, leave it as is
if ( ! bCodeConverted )
{
pDest[nLen++] = _T('&');
++nChar;
}
}
else // not &
{
nCharLen = (int)_tclen(&pSource[nChar]);
_tccpy( &pDest[nLen], &pSource[nChar] );
nLen += nCharLen;
nChar += nCharLen;
}
}
strText.ReleaseBuffer(nLen);
return string(strText);
}
int CXMLParse::FindNode( int nType )
{
// Change current node position only if a node is found
// If nType is 0 find any node, otherwise find node of type nType
// Return type of node or 0 if not found
// If found node is an element, change m_iPos
// Determine where in document to start scanning for node
int nTypeFound = 0;
int nNodeOffset = m_nNodeOffset;
if ( m_nNodeType > 1 )
{
// By-pass current node
nNodeOffset += m_nNodeLength;
}
else
{
// Set position to begin looking for node
nNodeOffset = 0; // default to start of document
if ( m_iPos )
{
// After element
nNodeOffset = m_aPos[m_iPos].StartAfter();
}
else if ( m_iPosParent )
{
// Immediately after start tag of parent
if ( m_aPos[m_iPosParent].IsEmptyElement() )
return 0;
else
nNodeOffset = m_aPos[m_iPosParent].StartContent();
}
}
// Get nodes until we find what we're looking for
int iPosNew = m_iPos;
TokenPos token( m_strDoc, m_nFlags );
NodePos node;
token.nNext = nNodeOffset;
do
{
nNodeOffset = token.nNext;
nTypeFound = x_ParseNode( token, node );
if ( nTypeFound == 0 )
{
// Check if we have reached the end of the parent element
// Otherwise it is a lone end tag
if ( m_iPosParent && nNodeOffset == m_aPos[m_iPosParent].StartContent()
+ m_aPos[m_iPosParent].ContentLen() )
return 0;
nTypeFound = MNT_LONE_END_TAG;
}
else if ( nTypeFound < 0 )
{
if ( nTypeFound == -2 )
return 0;
// -1 is node error
nTypeFound = MNT_NODE_ERROR;
}
else if ( nTypeFound == MNT_ELEMENT )
{
if ( iPosNew )
iPosNew = m_aPos[iPosNew].iElemNext;
else
iPosNew = m_aPos[m_iPosParent].iElemChild;
if ( ! iPosNew )
return 0;
if ( ! nType || (nType & nTypeFound) )
{
// Found element node, move position to this element
x_SetPos( m_iPosParent, iPosNew, 0 );
return m_nNodeType;
}
token.nNext = m_aPos[iPosNew].StartAfter();
}
}
while ( nType && ! (nType & nTypeFound) );
m_iPos = iPosNew;
m_iPosChild = 0;
m_nNodeOffset = nNodeOffset;
m_nNodeLength = token.nNext - nNodeOffset;
m_nNodeType = nTypeFound;
return m_nNodeType;
}
bool CXMLParse::RemoveNode()
{
if ( m_iPos || m_nNodeLength )
{
x_RemoveNode( m_iPosParent, m_iPos, m_nNodeType, m_nNodeOffset, m_nNodeLength );
m_iPosChild = 0;
return true;
}
return false;
}
string CXMLParse::GetTagName() const
{
// Return the tag name at the current main position
CString strTagName;
// This method is primarily for elements, however
// it does return something for certain other nodes
if ( m_nNodeLength )
{
switch ( m_nNodeType )
{
case MNT_PROCESSING_INSTRUCTION:
case MNT_LONE_END_TAG:
{
// <?target or </tagname
TokenPos token( m_strDoc, m_nFlags );
token.nNext = m_nNodeOffset + 2;
if ( x_FindName(token) )
strTagName = x_GetToken( token ).c_str();
}
break;
case MNT_COMMENT:
strTagName = _T("#comment");
break;
case MNT_CDATA_SECTION:
strTagName = _T("#cdata-section");
break;
case MNT_DOCUMENT_TYPE:
{
// <!DOCTYPE name
TokenPos token( m_strDoc, m_nFlags );
token.nNext = m_nNodeOffset + 2;
if ( x_FindName(token) && x_FindName(token) )
strTagName = x_GetToken( token ).c_str();
}
break;
case MNT_TEXT:
case MNT_WHITESPACE:
strTagName = _T("#text");
break;
}
return string(strTagName);
}
if ( m_iPos )
strTagName = x_GetTagName( m_iPos ).c_str();
return string(strTagName);
}
bool CXMLParse::IntoElem()
{
// If there is no child position and IntoElem is called it will succeed in release 6.3
// (A subsequent call to FindElem will find the first element)
// The following short-hand behavior was never part of EDOM and was misleading
// It would find a child element if there was no current child element position and go into it
// It is removed in release 6.3, this change is NOT backwards compatible!
// if ( ! m_iPosChild )
// FindChildElem();
if ( m_iPos && m_nNodeType == MNT_ELEMENT )
{
x_SetPos( m_iPos, m_iPosChild, 0 );
return true;
}
return false;
}
bool CXMLParse::OutOfElem()
{
// Go to parent element
if ( m_iPosParent )
{
x_SetPos( m_aPos[m_iPosParent].iElemParent, m_iPosParent, m_iPos );
return true;
}
return false;
}
string CXMLParse::GetAttribName( int n ) const
{
// Return nth attribute name of main position
TokenPos token( m_strDoc, m_nFlags );
if ( m_iPos && m_nNodeType == MNT_ELEMENT )
token.nNext = m_aPos[m_iPos].nStart + 1;
else if ( m_nNodeLength && m_nNodeType == MNT_PROCESSING_INSTRUCTION )
token.nNext = m_nNodeOffset + 2;
else
return _T("");
if ( x_FindAttrib(token,NULL,n) )
return x_GetToken( token );
return _T("");
}
bool CXMLParse::SavePos( const char * szPosName )
{
// Save current element position in saved position map
if ( szPosName )
{
SavedPos savedpos;
if ( szPosName )
savedpos.strName = szPosName;
if ( m_iPosChild )
{
savedpos.iPos = m_iPosChild;
savedpos.nSavedPosFlags |= SavedPosMap::SPM_CHILD;
}
else if ( m_iPos )
{
savedpos.iPos = m_iPos;
savedpos.nSavedPosFlags |= SavedPosMap::SPM_MAIN;
}
else
{
savedpos.iPos = m_iPosParent;
}
savedpos.nSavedPosFlags |= SavedPosMap::SPM_USED;
if ( ! m_mapSavedPos.pTable )
m_mapSavedPos.AllocMapTable();
int nSlot = m_mapSavedPos.Hash( szPosName );
SavedPos* pSavedPos = m_mapSavedPos.pTable[nSlot];
int nOffset = 0;
if ( ! pSavedPos )
{
pSavedPos = new SavedPos[2];
pSavedPos[1].nSavedPosFlags = SavedPosMap::SPM_LAST;
m_mapSavedPos.pTable[nSlot] = pSavedPos;
}
else
{
while ( pSavedPos[nOffset].nSavedPosFlags & SavedPosMap::SPM_USED )
{
if ( pSavedPos[nOffset].strName == szPosName )
break;
if ( pSavedPos[nOffset].nSavedPosFlags & SavedPosMap::SPM_LAST )
{
int nNewSize = (nOffset + 6) * 2;
SavedPos* pNewSavedPos = new SavedPos[nNewSize];
for ( int nCopy=0; nCopy<=nOffset; ++nCopy )
pNewSavedPos[nCopy] = pSavedPos[nCopy];
pNewSavedPos[nOffset].nSavedPosFlags ^= SavedPosMap::SPM_LAST;
pNewSavedPos[nNewSize-1].nSavedPosFlags = SavedPosMap::SPM_LAST;
delete [] pSavedPos;
pSavedPos = pNewSavedPos;
m_mapSavedPos.pTable[nSlot] = pSavedPos;
++nOffset;
break;
}
++nOffset;
}
}
if ( pSavedPos[nOffset].nSavedPosFlags & SavedPosMap::SPM_LAST )
savedpos.nSavedPosFlags |= SavedPosMap::SPM_LAST;
pSavedPos[nOffset] = savedpos;
/*
// To review hash table balance, uncomment and watch strBalance
CString strBalance, strSlot;
for ( nSlot=0; nSlot < SavedPosMap::SPM_SIZE; ++nSlot )
{
pSavedPos = m_mapSavedPos.pTable[nSlot];
int nCount = 0;
while ( pSavedPos && pSavedPos->nSavedPosFlags & SavedPosMap::SPM_USED )
{
++nCount;
if ( pSavedPos->nSavedPosFlags & SavedPosMap::SPM_LAST )
break;
++pSavedPos;
}
strSlot.Format( _T("%d "), nCount );
strBalance += strSlot;
}
*/
return true;
}
return false;
}
bool CXMLParse::RestorePos( const char * szPosName )
{
// Restore element position if found in saved position map
if ( szPosName && m_mapSavedPos.pTable )
{
int nSlot = m_mapSavedPos.Hash( szPosName );
SavedPos* pSavedPos = m_mapSavedPos.pTable[nSlot];
if ( pSavedPos )
{
int nOffset = 0;
while ( pSavedPos[nOffset].nSavedPosFlags & SavedPosMap::SPM_USED )
{
if ( pSavedPos[nOffset].strName == szPosName )
{
int i = pSavedPos[nOffset].iPos;
if ( pSavedPos[nOffset].nSavedPosFlags & SavedPosMap::SPM_CHILD )
x_SetPos( m_aPos[m_aPos[i].iElemParent].iElemParent, m_aPos[i].iElemParent, i );
else if ( pSavedPos[nOffset].nSavedPosFlags & SavedPosMap::SPM_MAIN )
x_SetPos( m_aPos[i].iElemParent, i, 0 );
else
x_SetPos( i, 0, 0 );
return true;
}
if ( pSavedPos[nOffset].nSavedPosFlags & SavedPosMap::SPM_LAST )
break;
++nOffset;
}
}
}
return false;
}
bool CXMLParse::RemoveElem()
{
// Remove current main position element
if ( m_iPos && m_nNodeType == MNT_ELEMENT )
{
int iPos = x_RemoveElem( m_iPos );
x_SetPos( m_iPosParent, iPos, 0 );
return true;
}
return false;
}
bool CXMLParse::RemoveChildElem()
{
// Remove current child position element
if ( m_iPosChild )
{
int iPosChild = x_RemoveElem( m_iPosChild );
x_SetPos( m_iPosParent, m_iPos, iPosChild );
return true;
}
return false;
}
//////////////////////////////////////////////////////////////////////
// Private Methods
//////////////////////////////////////////////////////////////////////
bool CXMLParse::x_AllocPosArray( int nNewSize /*=0*/ )
{
// Resize m_aPos when the document is created or the array is filled
// The PosArray class is implemented using segments to reduce contiguous memory requirements
// It reduces reallocations (copying of memory) since this only occurs within one segment
// The "Grow By" algorithm ensures there are no reallocations after 2 segments
//
if ( ! nNewSize )
nNewSize = m_iPosFree + (m_iPosFree>>1); // Grow By: multiply size by 1.5
if ( m_aPos.GetSize() < nNewSize )
{
// Grow By: new size can be at most one more complete segment
int nSeg = (m_aPos.GetSize()?m_aPos.GetSize()-1:0) >> m_aPos.PA_SEGBITS;
int nNewSeg = (nNewSize-1) >> m_aPos.PA_SEGBITS;
if ( nNewSeg > nSeg + 1 )
{
nNewSeg = nSeg + 1;
nNewSize = (nNewSeg+1) << m_aPos.PA_SEGBITS;
}
// Allocate array of segments
if ( m_aPos.nSegs <= nNewSeg )
{
int nNewSegments = 4 + nNewSeg * 2;
char* pNewSegments = new char[nNewSegments*sizeof(char*)];
if ( m_aPos.SegsUsed() )
memcpy( pNewSegments, m_aPos.pSegs, m_aPos.SegsUsed()*sizeof(char*) );
if ( m_aPos.pSegs )
delete[] (char*)m_aPos.pSegs;
m_aPos.pSegs = (ElemPos**)pNewSegments;
m_aPos.nSegs = nNewSegments;
}
// Calculate segment sizes
int nSegSize = m_aPos.GetSize() - (nSeg << m_aPos.PA_SEGBITS);
int nNewSegSize = nNewSize - (nNewSeg << m_aPos.PA_SEGBITS);
// Complete first segment
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -