📄 markup.cpp
字号:
pElem = &m_aPos[iPosMatch];
pElem->nLength = aNodes.Top().nStart - pElem->nStart + aNodes.Top().nLength;
pElem->SetEndTagLen( aNodes.Top().nLength );
}
}
else if ( nTypeFound == -1 )
{
m_aPos[iVirtualParent].nFlags |= MNF_ILLFORMED;
m_aPos[iPos].nFlags |= MNF_ILLDATA;
if ( MCD_STRISEMPTY(m_strError) )
m_strError = aNodes.Top().strMeta;
}
// Matched end tag, or end of document
if ( nMatchDepth || nTypeFound == -2 )
{
if ( nDepth > nMatchDepth )
m_aPos[iVirtualParent].nFlags |= MNF_ILLFORMED;
// Process any non-ended elements
while ( nDepth > nMatchDepth )
{
// Element with no end tag
pElem = &m_aPos[iPos];
iPosChild = pElem->iElemChild;
iPosParent = pElem->iElemParent;
pElem->SetEndTagLen( 0 );
pElem->nFlags |= MNF_NONENDED;
pElem->iElemChild = 0;
pElem->nLength = pElem->StartTagLen();
if ( pElem->nFlags & MNF_ILLDATA )
{
pElem->nFlags ^= MNF_ILLDATA;
m_aPos[iPosParent].nFlags |= MNF_ILLDATA;
}
while ( iPosChild )
{
m_aPos[iPosChild].iElemParent = iPosParent;
m_aPos[iPosChild].iElemPrev = iPos;
m_aPos[iPos].iElemNext = iPosChild;
iPos = iPosChild;
iPosChild = m_aPos[iPosChild].iElemNext;
}
iPos = iPosParent;
aNodes.Remove();
--nDepth;
// Error string
// if end tag did not match, top node is end tag that did not match pElem
// if end of document, any nodes below top have no end tag
if ( MCD_STRISEMPTY(m_strError) )
{
if ( nTypeFound == 0 )
{
MCD_CHAR* szError = new MCD_CHAR[MCD_STRLENGTH(aNodes.Top().strMeta)+token.Length()+100];
MCD_SPRINTF( szError, _T("End tag '%s' at offset %d does not match start tag '%s' at offset %d"),
MCD_2PCSZ(x_GetToken(token)), token.nL-1, MCD_2PCSZ(aNodes.Top().strMeta), pElem->nStart );
m_strError = szError;
delete [] szError;
}
else
{
MCD_CHAR* szError = new MCD_CHAR[MCD_STRLENGTH(aNodes.Top().strMeta)+100];
MCD_SPRINTF( szError, _T("Element '%s' at offset %d not ended"),
MCD_2PCSZ(aNodes.Top().strMeta), aNodes.Top().nStart );
m_strError = szError;
delete [] szError;
}
}
}
if ( nTypeFound == -2 )
break;
iPosParent = m_aPos[iPos].iElemParent;
iPos = iPosParent;
aNodes.Remove();
--nDepth;
}
}
return iElemRoot;
}
bool CMarkup::x_FindAny( MCD_PCSZ szDoc, int& nChar )
{
// Starting at nChar, find a non-whitespace char
// return false if no non-whitespace before end of document, nChar points to end
// otherwise return true and nChar points to non-whitespace char
while ( szDoc[nChar] && MCD_PSZCHR(_T(" \t\n\r"),szDoc[nChar]) )
++nChar;
return szDoc[nChar] != _T('\0');
}
bool CMarkup::x_FindName( CMarkup::TokenPos& token )
{
// Starting at token.nNext, bypass whitespace and find the next name
// returns true on success, members of token point to token
// returns false on end of document, members point to end of document
MCD_PCSZ szDoc = token.szDoc;
int nChar = token.nNext;
// By-pass leading whitespace
if ( ! x_FindAny(szDoc,nChar) )
{
// No token was found before end of document
token.nL = nChar;
token.nR = nChar - 1;
token.nNext = nChar;
return false;
}
// Go until special char or whitespace
token.nL = nChar;
while ( szDoc[nChar] && ! MCD_PSZCHR(_T(" \t\n\r<>=\\/?!"),szDoc[nChar]) )
nChar += MCD_CLEN(&szDoc[nChar]);
// Adjust end position if it is one special char
if ( nChar == token.nL )
++nChar; // it is a special char
token.nR = nChar - 1;
// nNext points to one past last char of token
token.nNext = nChar;
return true;
}
MCD_STR CMarkup::x_GetToken( const CMarkup::TokenPos& token )
{
// The token contains indexes into the document identifying a small substring
// Build the substring from those indexes and return it
if ( token.nL > token.nR )
return _T("");
MCD_STR strToken( &token.szDoc[token.nL], token.Length() );
return strToken;
}
int CMarkup::x_FindElem( int iPosParent, int iPos, MCD_PCSZ szPath ) const
{
// If szPath is NULL or empty, go to next sibling element
// Otherwise go to next sibling element with matching path
//
if ( iPos )
iPos = m_aPos[iPos].iElemNext;
else
iPos = m_aPos[iPosParent].iElemChild;
// Finished here if szPath not specified
if ( szPath == NULL || !szPath[0] )
return iPos;
// Search
TokenPos token( m_strDoc, m_nFlags );
while ( iPos )
{
// Compare tag name
token.nNext = m_aPos[iPos].nStart + 1;
x_FindName( token ); // Locate tag name
if ( token.Match(szPath) )
return iPos;
iPos = m_aPos[iPos].iElemNext;
}
return 0;
}
int CMarkup::x_ParseNode( CMarkup::TokenPos& token, CMarkup::NodePos& node )
{
// Call this with token.nNext set to the start of the node or tag
// Upon return token.nNext points to the char after the node or tag
//
// <!--...--> comment
// <!DOCTYPE ...> dtd
// <?target ...?> processing instruction
// <![CDATA[...]]> cdata section
// <NAME ...> element start tag
// </NAME ...> element end tag
//
// returns the nodetype or
// 0 for end tag
// -1 for bad node
// -2 for end of document
//
enum ParseBits
{
PD_OPENTAG = 1,
PD_BANG = 2,
PD_DASH = 4,
PD_BRACKET = 8,
PD_TEXTORWS = 16,
PD_DOCTYPE = 32,
PD_INQUOTE_S = 64,
PD_INQUOTE_D = 128,
PD_EQUALS = 256,
};
int nParseFlags = 0;
MCD_PCSZ szFindEnd = NULL;
int nNodeType = -1;
int nEndLen = 0;
int nName = 0;
unsigned int cDminus1 = 0, cDminus2 = 0;
#define FINDNODETYPE(e,t,n) { szFindEnd=e; nEndLen=(sizeof(e)-1)/sizeof(MCD_CHAR); nNodeType=t; if(n) nName=(int)(pDoc-token.szDoc)+n-1; }
#define FINDNODEBAD(e) { szFindEnd=_T(">"); nEndLen=1; MCD_CHAR szE[100]; MCD_SPRINTF(szE,_T("Incorrect %s at offset %d"),e,nR); node.strMeta=szE; nNodeType=-1; }
node.nStart = token.nNext;
node.nFlags = 0;
int nR = token.nNext;
MCD_PCSZ pDoc = &token.szDoc[nR];
register unsigned int cD = (unsigned int)*pDoc;
if ( ! cD )
{
node.nLength = 0;
node.nNodeType = 0;
return -2; // end of document
}
while ( 1 )
{
cD = (unsigned int)*pDoc;
if ( ! cD )
{
nR = (int)(pDoc - token.szDoc) - 1;
if ( nNodeType != MNT_WHITESPACE && nNodeType != MNT_TEXT )
{
MCD_PCSZ szType = _T("tag");
if ( (nParseFlags & PD_DOCTYPE) || nNodeType == MNT_DOCUMENT_TYPE )
szType = _T("Doctype");
else if ( nNodeType == MNT_ELEMENT )
szType = _T("Element tag");
else if ( nNodeType == 0 )
szType = _T("Element end tag");
else if ( nNodeType == MNT_CDATA_SECTION )
szType = _T("CDATA Section");
else if ( nNodeType == MNT_PROCESSING_INSTRUCTION )
szType = _T("Processing instruction");
else if ( nNodeType == MNT_COMMENT )
szType = _T("Comment");
nNodeType = -1;
MCD_CHAR szError[100];
MCD_SPRINTF( szError, _T("%s at offset %d unterminated"), szType, node.nStart );
node.strMeta = szError;
}
break;
}
if ( nName )
{
if ( MCD_PSZCHR(_T(" \t\n\r/>"),(MCD_CHAR)cD) )
{
int nNameLen = (int)(pDoc - token.szDoc) - nName;
if ( nNodeType == 0 )
{
token.nL = nName;
token.nR = nName + nNameLen - 1;
}
else
{
MCD_STRASSIGN(node.strMeta,&token.szDoc[nName],nNameLen);
}
nName = 0;
cDminus2 = 0;
cDminus1 = 0;
}
else
{
pDoc += MCD_CLEN( pDoc );
continue;
}
}
if ( szFindEnd )
{
if ( cD == _T('>') && ! (nParseFlags & (PD_INQUOTE_S|PD_INQUOTE_D)) )
{
nR = (int)(pDoc - token.szDoc);
if ( nEndLen == 1 )
{
szFindEnd = NULL;
if ( nNodeType == MNT_ELEMENT && cDminus1 == _T('/') )
{
if ( (! cDminus2) || MCD_PSZCHR(_T(" \t\n\r\'\""),(MCD_CHAR)cDminus2) )
node.nFlags |= MNF_EMPTY;
}
}
else if ( nR > nEndLen )
{
// Test for end of PI or comment
MCD_PCSZ pEnd = pDoc - nEndLen + 1;
MCD_PCSZ pFindEnd = szFindEnd;
int nLen = nEndLen;
while ( --nLen && *pEnd++ == *pFindEnd++ );
if ( nLen == 0 )
szFindEnd = NULL;
}
if ( ! szFindEnd && ! (nParseFlags & PD_DOCTYPE) )
break;
}
else if ( cD == _T('<') && (nNodeType == MNT_TEXT || nNodeType == -1) )
{
nR = (int)(pDoc - token.szDoc) - 1;
break;
}
else if ( nNodeType & MNT_ELEMENT )
{
if ( (nParseFlags & (PD_INQUOTE_S|PD_INQUOTE_D)) )
{
if ( cD == _T('\"') && (nParseFlags&PD_INQUOTE_D) )
nParseFlags ^= PD_INQUOTE_D; // off
else if ( cD == _T('\'') && (nParseFlags&PD_INQUOTE_S) )
nParseFlags ^= PD_INQUOTE_S; // off
}
else // not in quotes
{
// Only set INQUOTE status when preceeded by equal sign
if ( cD == _T('\"') && (nParseFlags&PD_EQUALS) )
nParseFlags ^= PD_INQUOTE_D|PD_EQUALS; // D on, equals off
else if ( cD == _T('\'') && (nParseFlags&PD_EQUALS) )
nParseFlags ^= PD_INQUOTE_S|PD_EQUALS; // S on, equals off
else if ( cD == _T('=') && cDminus1 != _T('=') && ! (nParseFlags&PD_EQUALS) )
nParseFlags ^= PD_EQUALS; // on
else if ( (nParseFlags&PD_EQUALS) && ! MCD_PSZCHR(_T(" \t\n\r"),(MCD_CHAR)cD) )
nParseFlags ^= PD_EQUALS; // off
}
cDminus2 = cDminus1;
cDminus1 = cD;
}
else if ( nNodeType & MNT_DOCUMENT_TYPE )
{
if ( cD == _T('\"') && ! (nParseFlags&PD_INQUOTE_S) )
nParseFlags ^= PD_INQUOTE_D; // toggle
else if ( cD == _T('\'') && ! (nParseFlags&PD_INQUOTE_D) )
nParseFlags ^= PD_INQUOTE_S; // toggle
}
}
else if ( nParseFlags )
{
if ( nParseFlags & PD_TEXTORWS )
{
if ( cD == _T('<') )
{
nR = (int)(pDoc - token.szDoc) - 1;
nNodeType = MNT_WHITESPACE;
break;
}
else if ( ! MCD_PSZCHR(_T(" \t\n\r"),(MCD_CHAR)cD) )
{
nParseFlags ^= PD_TEXTORWS;
FINDNODETYPE( _T("<"), MNT_TEXT, 0 )
}
}
else if ( nParseFlags & PD_OPENTAG )
{
nParseFlags ^= PD_OPENTAG;
if ( cD > 0x60 || ( cD > 0x40 && cD < 0x5b ) || cD == 0x5f || cD == 0x3a )
FINDNODETYPE( _T(">"), MNT_ELEMENT, 1 )
else if ( cD == _T('/') )
FINDNODETYPE( _T(">"), 0, 2 )
else if ( cD == _T('!') )
nParseFlags |= PD_BANG;
else if ( cD == _T('?') )
FINDNODETYPE( _T("?>"), MNT_PROCESSING_INSTRUCTION, 2 )
else
FINDNODEBAD( _T("tag name character") )
}
else if ( nParseFlags & PD_BANG )
{
nParseFlags ^= PD_BANG;
if ( cD == _T('-') )
nParseFlags |= PD_DASH;
else if ( cD == _T('[') && !(nParseFlags & PD_DOCTYPE) )
nParseFlags |= PD_BRACKET;
else if ( cD == _T('D') && !(nParseFlags & PD_DOCTYPE) )
nParseFlags |= PD_DOCTYPE;
else if ( MCD_PSZCHR(_T("EAN"),(MCD_CHAR)cD) ) // <!ELEMENT ATTLIST ENTITY NOTATION
FINDNODETYPE( _T(">"), MNT_DOCUMENT_TYPE, 0 )
else
FINDNODEBAD( _T("! tag") )
}
else if ( nParseFlags & PD_DASH )
{
nParseFlags ^= PD_DASH;
if ( cD == _T('-') )
FINDNODETYPE( _T("-->"), MNT_COMMENT, 0 )
else
FINDNODEBAD( _T("comment tag") )
}
else if ( nParseFlags & PD_BRACKET )
{
nParseFlags ^= PD_BRACKET;
if ( cD == _T('C') )
FINDNODETYPE( _T("]]>"), MNT_CDATA_SECTION, 0 )
else
FINDNODEBAD( _T("tag") )
}
else if ( nParseFlags & PD_DOCTYPE )
{
if ( cD == _T('<') )
nParseFlags |= PD_OPENTAG;
else if ( cD == _T('>') )
{
nR = (int)(pDoc - token.szDoc);
nNodeType = MNT_DOCUMENT_TYPE;
break;
}
}
}
else if ( cD == _T('<') )
{
nParseFlags |= PD_OPENTAG;
}
else
{
nNodeType = MNT_WHITESPACE;
if ( MCD_PSZCHR(_T(" \t\n\r"),(MCD_CHAR)cD) )
nParseFlags |= PD_TEXTORWS;
else
FINDNODETYPE( _T("<"), MNT_TEXT, 0 )
}
pDoc += MCD_CLEN( pDoc );
}
token.nNext = nR + 1;
node.nLength = token.nNext - node.nStart;
node.nNodeType = nNodeType;
return nNodeType;
}
MCD_STR CMarkup::x_GetPath( int iPos ) const
{
MCD_STR strPath;
while ( iPos )
{
MCD_STR strTagName = x_GetTagName( iPos );
int iPosParent = m_aPos[iPos].iElemParent;
int iPosSib = 0;
int nCount = 0;
while ( iPosSib != iPos )
{
iPosSib = x_FindElem( iPosParent, iPosSib, MCD_2PCSZ(strTagName) );
++nCount;
}
if ( nCount > 1 )
{
MCD_CHAR szPred[25];
MCD_SPRINTF( szPred, _T("[%d]"), nCount );
strPath = _T("/") + strTagName + szPred + strPath;
}
else
strPath = _T("/") + strTagName + strPath;
iPos = iPosParent;
}
return strPath;
}
MCD_STR CMarkup::x_GetTagName( int iPos ) const
{
// Return the tag name at specified element
TokenPos token( m_strDoc, m_nFlags );
token.nNext = m_aPos[iPos].nStart + 1;
if ( ! iPos || ! x_FindName( token ) )
return _T("");
// Return substring of document
return x_GetToken( token );
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -