📄 markup.cpp
字号:
// Markup.cpp: implementation of the CMarkup class.
//
// Markup Release 6.1 Lite
// Copyright (C) 1999-2001 First Objective Software, Inc. All rights reserved
// This entire notice must be retained in this source code
// Redistributing this source code requires written permission
// This software is provided "as is", with no warranty.
// Latest fixes enhancements and documentation at www.firstobject.com
#include "stdafx.h"
#include "afxconv.h"
#include "Markup.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
void CMarkup::operator=( const CMarkup& markup )
{
m_iPos = markup.m_iPos;
m_iPosChild = markup.m_iPosChild;
m_iPosFree = markup.m_iPosFree;
m_aPos.RemoveAll();
m_aPos.Append( markup.m_aPos );
m_nLevel = markup.m_nLevel;
m_csDoc = markup.m_csDoc;
}
void CMarkup::ResetPos()
{
// Reset the main and child positions
m_iPos = 0;
m_iPosChild = 0;
m_nLevel = 0;
};
bool CMarkup::SetDoc( LPCTSTR szDoc )
{
// Reset indexes
m_iPosFree = 1;
ResetPos();
// Set document text
if ( szDoc )
m_csDoc = szDoc;
else
m_csDoc.Empty();
// Starting size of position array: 1 element per 64 bytes of document
// Tight fit when parsing small doc, only 0 to 2 reallocs when parsing large doc
// Start at 8 when creating new document
int nStartSize = m_csDoc.GetLength() / 64 + 8;
if ( m_aPos.GetSize() < nStartSize )
m_aPos.SetSize( nStartSize );
// Parse document
bool bWellFormed = false;
if ( m_csDoc.GetLength() )
{
m_aPos[0].Clear();
int iPos = x_ParseElem( 0 );
if ( iPos > 0 )
{
m_aPos[0].iElemChild = iPos;
bWellFormed = true;
}
}
// Clear indexes if parse failed or empty document
if ( ! bWellFormed )
{
m_aPos[0].Clear();
m_iPosFree = 1;
}
ResetPos();
return bWellFormed;
};
bool CMarkup::IsWellFormed()
{
if ( m_aPos.GetSize() && m_aPos[0].iElemChild )
return TRUE;
return FALSE;
}
bool CMarkup::FindElem( LPCTSTR szName )
{
// If szName is NULL or empty, go to next sibling element
// Otherwise go to next sibling element with matching tag name
// If the current position is valid, start looking from next
// Change current position only if found
//
int iPos = m_iPos;
if ( ! iPos )
{
if ( m_aPos.GetSize() )
{
iPos = m_aPos[0].iElemChild;
}
}
else
{
iPos = m_aPos[iPos].iElemNext;
}
while ( iPos )
{
// Compare tag name unless szName is not specified
if ( szName == NULL || !szName[0] || x_GetTagName(iPos) == szName )
{
// Assign new position
m_iPos = iPos;
m_iPosChild = 0;
return true;
}
iPos = m_aPos[iPos].iElemNext;
}
return false;
}
bool CMarkup::FindChildElem( LPCTSTR szName )
{
// If szName is NULL or empty, go to next sibling child element
// Otherwise go to next sibling child element with matching tag name
// If the current child position is valid, start looking from next
// Change current child position only if found
//
// Shorthand: call this with no current position means under root element
if ( ! m_iPos )
FindElem();
// Is main position valid and not empty?
if ( ! m_iPos || m_aPos[m_iPos].IsEmptyElement() )
return false;
// Is current child position valid?
int iPosChild = m_iPosChild;
if ( iPosChild )
iPosChild = m_aPos[iPosChild].iElemNext;
else
iPosChild = m_aPos[m_iPos].iElemChild;
// Search
while ( iPosChild )
{
// Compare tag name unless szName is not specified
if ( szName == NULL || !szName[0] || x_GetTagName(iPosChild) == szName )
{
// Assign new position
m_iPosChild = iPosChild;
return true;
}
iPosChild = m_aPos[iPosChild].iElemNext;
}
return false;
}
bool CMarkup::IntoElem()
{
// Find child element unless there is already a child element position
if ( ! m_iPosChild )
FindChildElem();
if ( m_iPosChild )
{
m_iPos = m_iPosChild;
m_iPosChild = 0;
++m_nLevel;
return true;
}
return false;
}
bool CMarkup::OutOfElem()
{
// Go to parent element
if ( m_iPos && m_nLevel > 0 )
{
m_iPosChild = m_iPos;
m_iPos = m_aPos[m_iPos].iElemParent;
--m_nLevel;
return true;
}
return false;
}
bool CMarkup::GetOffsets( int& nStart, int& nEnd ) const
{
// Return document offsets of current main position element
// This is not part of EDOM but is used by the Markup project
if ( m_iPos )
{
nStart = m_aPos[m_iPos].nStartL;
nEnd = m_aPos[m_iPos].nEndR;
return true;
}
return false;
}
bool CMarkup::AddElem( LPCTSTR szName, LPCTSTR szValue )
{
// Add an element after current main position
int iPosParent = m_iPos? m_aPos[m_iPos].iElemParent : 0;
m_iPosChild = 0;
// Setting root element?
if ( iPosParent == 0 )
{
if ( IsWellFormed() )
return false;
m_csDoc.Empty();
}
m_iPos = x_Add( iPosParent, m_iPos, szName, szValue );
return true;
}
bool CMarkup::AddChildElem( LPCTSTR szName, LPCTSTR szValue )
{
// Add a child element under main position, after current child position
if ( ! m_iPos )
return false;
// If no child position, add after last sibling
int iPosLast = m_aPos[m_iPos].iElemChild;
if ( ! m_iPosChild && iPosLast )
{
m_iPosChild = iPosLast;
while ( (iPosLast=m_aPos[iPosLast].iElemNext) != 0 )
m_iPosChild = iPosLast;
}
m_iPosChild = x_Add( m_iPos, m_iPosChild, szName, szValue );
return true;
}
bool CMarkup::AddAttrib( LPCTSTR szAttrib, LPCTSTR szValue )
{
// Add attribute to current main position element
if ( m_iPos )
{
x_AddAttrib( m_iPos, szAttrib, szValue );
return true;
}
return false;
}
bool CMarkup::AddChildAttrib( LPCTSTR szAttrib, LPCTSTR szValue )
{
// Add attribute to current child position element
if ( m_iPosChild )
{
x_AddAttrib( m_iPosChild, szAttrib, szValue );
return true;
}
return false;
}
//////////////////////////////////////////////////////////////////////
// Private Methods
//////////////////////////////////////////////////////////////////////
int CMarkup::x_GetFreePos()
{
//
// This returns the index of the next unused ElemPos in the array
//
if ( m_iPosFree == m_aPos.GetSize() )
m_aPos.SetSize( m_iPosFree + m_iPosFree / 2 );
++m_iPosFree;
return m_iPosFree - 1;
}
int CMarkup::x_ReleasePos()
{
//
// This decrements the index of the next unused ElemPos in the array
// allowing the element index returned by GetFreePos() to be reused
//
--m_iPosFree;
return 0;
}
int CMarkup::x_ParseError( LPCTSTR szError, LPCTSTR szTag )
{
if ( szTag )
m_csError.Format( szError, szTag );
else
m_csError = szError;
x_ReleasePos();
return -1;
}
int CMarkup::x_ParseElem( int iPosParent )
{
// This is either called by SetDoc, x_AddSubDoc, or itself recursively
// m_aPos[iPosParent].nEndL is where to start parsing for the child element
// This returns the new position if a tag is found, otherwise zero
// In all cases we need to get a new ElemPos, but release it if unused
//
int iPos = x_GetFreePos();
m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL;
m_aPos[iPos].nNext = m_aPos[iPosParent].nStartR + 1;
m_aPos[iPos].iElemParent = iPosParent;
m_aPos[iPos].iElemChild = 0;
m_aPos[iPos].iElemNext = 0;
// Start Tag
// A loop is used to ignore all remarks tags and special tags
// i.e. <?xml version="1.0"?>, and <!-- comment here -->
// So any tag beginning with ? or ! is ignored
// Loop past ignored tags
TokenPos token;
token.nNext = m_aPos[iPosParent].nEndL;
CString csName;
while ( csName.IsEmpty() )
{
// Look for left angle bracket of start tag
m_aPos[iPos].nStartL = token.nNext;
if ( ! x_FindChar( m_aPos[iPos].nStartL, _T('<') ) )
return x_ParseError( _T("Element tag not found") );
// Set parent's End tag to start looking from here (or later)
m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL;
// Determine whether this is an element, comment or version tag
LPCTSTR szEndOfTag = NULL;
token.nNext = m_aPos[iPos].nStartL + 1;
if ( x_FindToken( token ) )
{
if ( token.bIsString )
return x_ParseError( _T("Tag starts with quote") );
TCHAR cFirstChar = m_csDoc[token.nL];
if ( cFirstChar == _T('?') )
szEndOfTag = _T("?>"); // version
else if ( cFirstChar == _T('!') )
{
TCHAR cSecondChar = 0;
if ( token.nL+1 < m_csDoc.GetLength() )
cSecondChar = m_csDoc[token.nL+1];
if ( cSecondChar == _T('[') )
szEndOfTag = _T("]]>"); // CDATA section
else if ( cSecondChar == _T('-') )
szEndOfTag = _T("-->"); // comment
else
szEndOfTag = _T(">"); // DTD
}
else if ( cFirstChar != _T('/') )
{
csName = x_GetToken( token );
szEndOfTag = _T(">");
}
else
return x_ReleasePos(); // probably end tag of parent
}
else
return x_ParseError( _T("Abrupt end within tag") );
// Look for end of tag
token.nNext = m_csDoc.Find( szEndOfTag, token.nNext );
if ( token.nNext == -1 )
return x_ParseError( _T("End of tag not found") );
}
m_aPos[iPos].nStartR = token.nNext;
// Is ending mark within start tag, i.e. empty element?
if ( m_csDoc[m_aPos[iPos].nStartR-1] == _T('/') )
{
// Empty element
// Close tag left is set to ending mark, and right to open tag right
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR-1;
m_aPos[iPos].nEndR = m_aPos[iPos].nStartR;
}
else // look for end tag
{
// Element probably has contents
// Determine where to start looking for left angle bracket of end tag
// This is done by recursively parsing the contents of this element
int iInner, iInnerPrev = 0;
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1;
while ( (iInner = x_ParseElem( iPos )) > 0 )
{
// Set links to iInner
if ( iInnerPrev )
m_aPos[iInnerPrev].iElemNext = iInner;
else
m_aPos[iPos].iElemChild = iInner;
iInnerPrev = iInner;
// Set offset to reflect child
m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1;
}
if ( iInner == -1 )
return -1;
// Look for left angle bracket of end tag
if ( ! x_FindChar( m_aPos[iPos].nEndL, _T('<') ) )
return x_ParseError( _T("End tag of %s element not found"), csName );
// Look through tokens of end tag
token.nNext = m_aPos[iPos].nEndL + 1;
int nTokenCount = 0;
while ( x_FindToken( token ) )
{
++nTokenCount;
if ( ! token.bIsString )
{
// Is first token not an end slash mark?
if ( nTokenCount == 1 && m_csDoc[token.nL] != _T('/') )
return x_ParseError( _T("Expecting end tag of element %s"), csName );
else if ( nTokenCount == 2 && csName != x_GetToken( token ) )
return x_ParseError( _T("End tag does not correspond to %s"), csName );
// Else is it a right angle bracket?
else if ( m_csDoc[token.nL] == _T('>') )
break;
}
}
// Was a right angle bracket not found?
if ( ! token.IsValid() || nTokenCount < 2 )
return x_ParseError( _T("End tag not completed for element %s"), csName );
m_aPos[iPos].nEndR = token.nL;
}
// Successfully found positions of angle brackets
m_aPos[iPos].nNext = m_aPos[iPos].nEndR;
x_FindChar( m_aPos[iPos].nNext, _T('<') );
return iPos;
}
bool CMarkup::x_FindChar( int&n, _TCHAR c ) const
{
// Look for char c starting at n, and set n to point to it
// c is always the first char of a multi-byte char
// Return false if not found before end of document
LPCTSTR szDoc = (LPCTSTR)m_csDoc;
while ( szDoc[n] && szDoc[n] != c )
n += _tclen( &szDoc[n] );
if ( ! szDoc[n] )
return false;
return true;
}
bool CMarkup::x_FindToken( CMarkup::TokenPos& token ) const
{
// Starting at token.nNext, find the next token
// upon successful return, token.nNext points after the retrieved token
LPCTSTR szDoc = (LPCTSTR)m_csDoc;
int n = token.nNext;
// Statically defined CStrings for whitespace and special chars
static CString csWhitespace = _T(" \t\n\r");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -