📄 markup.cpp
字号:
// Markup.cpp: implementation of the CMarkup class.
//
// Markup Release 6.3
// Copyright (C) 1999-2002 First Objective Software, Inc. All rights reserved
// Go to www.firstobject.com for the latest CMarkup and EDOM documentation
// Use in commercial applications requires written permission
// This software is provided "as is", with no warranty.
#include "stdafx.h"
#include "afxconv.h"
#include "Markup.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
#ifdef _MBCS
#pragma message( "Note: MBCS build (not UTF-8)" )
// For UTF-8, remove _MBCS from project settings C/C++ preprocessor definitions
#endif
// Defines for Windows CE
#ifndef _tclen
#define _tclen(p) 1
#define _tccpy(p1,p2) *(p1)=*(p2)
#endif
void CMarkup::operator=( const CMarkup& markup )
{
m_iPosParent = markup.m_iPosParent;
m_iPos = markup.m_iPos;
m_iPosChild = markup.m_iPosChild;
m_iPosFree = markup.m_iPosFree;
m_nNodeType = markup.m_nNodeType;
m_aPos.RemoveAll();
m_aPos.Append( markup.m_aPos );
m_csDoc = markup.m_csDoc;
MARKUP_SETDEBUGSTATE;
}
bool CMarkup::SetDoc( LPCTSTR szDoc )
{
// Reset indexes
m_iPosFree = 1;
ResetPos();
m_mapSavedPos.RemoveAll();
// Set document text
if ( szDoc )
m_csDoc = szDoc;
else
m_csDoc.Empty();
// Starting size of position array: 1 element per 64 bytes of document
// Tight fit when parsing small doc, only 0 to 2 reallocs when parsing large doc
// Start at 8 when creating new document
int nStartSize = m_csDoc.GetLength() / 64 + 8;
if ( m_aPos.GetSize() < nStartSize )
m_aPos.SetSize( nStartSize );
// Parse document
bool bWellFormed = false;
if ( m_csDoc.GetLength() )
{
m_aPos[0].Clear();
int iPos = x_ParseElem( 0 );
if ( iPos > 0 )
{
m_aPos[0].iElemChild = iPos;
bWellFormed = true;
}
}
// Clear indexes if parse failed or empty document
if ( ! bWellFormed )
{
m_aPos[0].Clear();
m_iPosFree = 1;
}
ResetPos();
return bWellFormed;
};
bool CMarkup::IsWellFormed()
{
if ( m_aPos.GetSize() && m_aPos[0].iElemChild )
return true;
return false;
}
bool CMarkup::Load( LPCTSTR szFileName )
{
CString csDoc;
CFile file;
if ( ! file.Open(szFileName,CFile::modeRead) )
return false;
int nLength = file.GetLength();
#if defined(_UNICODE)
// Allocate Buffer for UTF-8 file data
unsigned char* pBuffer = new unsigned char[nLength + 1];
nLength = file.Read( pBuffer, nLength );
pBuffer[nLength] = '\0';
// Convert file from UTF-8 to Windows UNICODE (AKA UCS-2)
int nWideLength = MultiByteToWideChar(CP_UTF8,0,(const char*)pBuffer,nLength,NULL,0);
nLength = MultiByteToWideChar(CP_UTF8,0,(const char*)pBuffer,nLength,
csDoc.GetBuffer(nWideLength),nWideLength);
ASSERT( nLength == nWideLength );
delete [] pBuffer;
#else
nLength = file.Read( csDoc.GetBuffer(nLength), nLength );
#endif
csDoc.ReleaseBuffer(nLength);
file.Close();
return SetDoc( csDoc );
}
bool CMarkup::Save( LPCTSTR szFileName )
{
int nLength = m_csDoc.GetLength();
CFile file;
if ( ! file.Open(szFileName,CFile::modeWrite|CFile::modeCreate) )
return false;
#if defined( _UNICODE )
int nUTF8Len = WideCharToMultiByte(CP_UTF8,0,m_csDoc,nLength,NULL,0,NULL,NULL);
char* pBuffer = new char[nUTF8Len+1];
nLength = WideCharToMultiByte(CP_UTF8,0,m_csDoc,nLength,pBuffer,nUTF8Len+1,NULL,NULL);
file.Write( pBuffer, nLength );
delete pBuffer;
#else
file.Write( (LPCTSTR)m_csDoc, nLength );
#endif
file.Close();
return true;
}
bool CMarkup::FindElem( LPCTSTR szName )
{
// Change current position only if found
//
if ( m_aPos.GetSize() )
{
int iPos = x_FindElem( m_iPosParent, m_iPos, szName );
if ( iPos )
{
// Assign new position
x_SetPos( m_aPos[iPos].iElemParent, iPos, 0 );
return true;
}
}
return false;
}
bool CMarkup::FindChildElem( LPCTSTR szName )
{
// Change current child position only if found
//
// Shorthand: call this with no current main position
// means find child under root element
if ( ! m_iPos )
FindElem();
int iPosChild = x_FindElem( m_iPos, m_iPosChild, szName );
if ( iPosChild )
{
// Assign new position
int iPos = m_aPos[iPosChild].iElemParent;
x_SetPos( m_aPos[iPos].iElemParent, iPos, iPosChild );
return true;
}
return false;
}
CString CMarkup::GetTagName() const
{
// Return the tag name at the current main position
CString csTagName;
if ( m_iPos )
csTagName = x_GetTagName( m_iPos );
return csTagName;
}
bool CMarkup::IntoElem()
{
// If there is no child position and IntoElem is called it will succeed in release 6.3
// (A subsequent call to FindElem will find the first element)
// The following short-hand behavior was never part of EDOM and was misleading
// It would find a child element if there was no current child element position and go into it
// It is removed in release 6.3, this change is NOT backwards compatible!
// if ( ! m_iPosChild )
// FindChildElem();
if ( m_iPos && m_nNodeType == MNT_ELEMENT )
{
x_SetPos( m_iPos, m_iPosChild, 0 );
return true;
}
return false;
}
bool CMarkup::OutOfElem()
{
// Go to parent element
if ( m_iPosParent )
{
x_SetPos( m_aPos[m_iPosParent].iElemParent, m_iPosParent, m_iPos );
return true;
}
return false;
}
CString CMarkup::GetAttrName( int n ) const
{
// Return nth Attrute name of main position
if ( ! m_iPos || m_nNodeType != MNT_ELEMENT )
return _T("");
TokenPos token( m_csDoc );
token.nNext = m_aPos[m_iPos].nStartL + 1;
for ( int nAttr=0; nAttr<=n; ++nAttr )
if ( ! x_FindAttr(token) )
return _T("");
// Return substring of document
return x_GetToken( token );
}
bool CMarkup::SavePos( LPCTSTR szPosName )
{
// Save current element position in saved position map
if ( szPosName )
{
SavedPos savedpos;
savedpos.iPosParent = m_iPosParent;
savedpos.iPos = m_iPos;
savedpos.iPosChild = m_iPosChild;
m_mapSavedPos.SetAt( szPosName, savedpos );
return true;
}
return false;
}
bool CMarkup::RestorePos( LPCTSTR szPosName )
{
// Restore element position if found in saved position map
SavedPos savedpos;
if ( szPosName && m_mapSavedPos.Lookup( szPosName, savedpos ) )
{
x_SetPos( savedpos.iPosParent, savedpos.iPos, savedpos.iPosChild );
return true;
}
return false;
}
bool CMarkup::GetOffsets( int& nStart, int& nEnd ) const
{
// Return document offsets of current main position element
// This is not part of EDOM but is used by the Markup project
if ( m_iPos )
{
nStart = m_aPos[m_iPos].nStartL;
nEnd = m_aPos[m_iPos].nEndR;
return true;
}
return false;
}
CString CMarkup::GetChildSubDoc() const
{
if ( m_iPosChild )
{
int nL = m_aPos[m_iPosChild].nStartL;
int nR = m_aPos[m_iPosChild].nEndR + 1;
TokenPos token( m_csDoc );
token.nNext = nR;
if ( ! x_FindToken(token) || m_csDoc[token.nL] == _T('<') )
nR = token.nL;
return m_csDoc.Mid( nL, nR - nL );
}
return _T("");
}
bool CMarkup::RemoveElem()
{
// Remove current main position element
if ( m_iPos && m_nNodeType == MNT_ELEMENT )
{
int iPos = x_RemoveElem( m_iPos );
x_SetPos( m_iPosParent, iPos, 0 );
return true;
}
return false;
}
bool CMarkup::RemoveChildElem()
{
// Remove current child position element
if ( m_iPosChild )
{
int iPosChild = x_RemoveElem( m_iPosChild );
x_SetPos( m_iPosParent, m_iPos, iPosChild );
return true;
}
return false;
}
//////////////////////////////////////////////////////////////////////
// Private Methods
//////////////////////////////////////////////////////////////////////
int CMarkup::x_GetFreePos()
{
//
// This returns the index of the next unused ElemPos in the array
//
if ( m_iPosFree == m_aPos.GetSize() )
m_aPos.SetSize( m_iPosFree + m_iPosFree / 2 );
++m_iPosFree;
return m_iPosFree - 1;
}
int CMarkup::x_ReleasePos()
{
//
// This decrements the index of the next unused ElemPos in the array
// allowing the element index returned by GetFreePos() to be reused
//
--m_iPosFree;
return 0;
}
int CMarkup::x_ParseError( LPCTSTR szError, LPCTSTR szName )
{
if ( szName )
m_csError.Format( szError, szName );
else
m_csError = szError;
x_ReleasePos();
return -1;
}
int CMarkup::x_ParseElem( int iPosParent )
{
// This is either called by SetDoc, x_AddSubDoc, or itself recursively
// m_aPos[iPosParent].nEndL is where to start parsing for the child element
// This returns the new position if a tag is found, otherwise zero
// In all cases we need to get a new ElemPos, but release it if unused
//
int iPos = x_GetFreePos();
m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL;
m_aPos[iPos].iElemParent = iPosParent;
m_aPos[iPos].iElemChild = 0;
m_aPos[iPos].iElemNext = 0;
// Start Tag
// A loop is used to ignore all remarks tags and special tags
// i.e. <?xml version="1.0"?>, and <!-- comment here -->
// So any tag beginning with ? or ! is ignored
// Loop past ignored tags
TokenPos token( m_csDoc );
token.nNext = m_aPos[iPosParent].nEndL;
CString csName;
while ( csName.IsEmpty() )
{
// Look for left angle bracket of start tag
m_aPos[iPos].nStartL = token.nNext;
if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nStartL, _T('<') ) )
return x_ParseError( _T("Element tag not found") );
// Set parent's End tag to start looking from here (or later)
m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL;
// Determine whether this is an element, or bypass other type of node
token.nNext = m_aPos[iPos].nStartL + 1;
if ( x_FindToken( token ) )
{
if ( token.bIsString )
return x_ParseError( _T("Tag starts with quote") );
_TCHAR cFirstChar = m_csDoc[token.nL];
if ( cFirstChar == _T('?') || cFirstChar == _T('!') )
{
token.nNext = m_aPos[iPos].nStartL;
if ( ! x_ParseNode(token) )
return x_ParseError( _T("Invalid node") );
}
else if ( cFirstChar != _T('/') )
{
csName = x_GetToken( token );
// Look for end of tag
if ( ! x_FindChar(token.szDoc, token.nNext, _T('>')) )
return x_ParseError( _T("End of tag not found") );
}
else
return x_ReleasePos(); // probably end tag of parent
}
else
return x_ParseError( _T("Abrupt end within tag") );
}
m_aPos[iPos].nStartR = token.nNext;
// Is ending mark within start tag, i.e. empty element?
if ( m_csDoc[m_aPos[iPos].nStartR-1] == _T('/') )
{
// Empty element
// Close tag left is set to ending mark, and right to open tag right
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR-1;
m_aPos[iPos].nEndR = m_aPos[iPos].nStartR;
}
else // look for end tag
{
// Element probably has contents
// Determine where to start looking for left angle bracket of end tag
// This is done by recursively parsing the contents of this element
int iInner, iInnerPrev = 0;
m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1;
while ( (iInner = x_ParseElem( iPos )) > 0 )
{
// Set links to iInner
if ( iInnerPrev )
m_aPos[iInnerPrev].iElemNext = iInner;
else
m_aPos[iPos].iElemChild = iInner;
iInnerPrev = iInner;
// Set offset to reflect child
m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1;
}
if ( iInner == -1 )
return -1;
// Look for left angle bracket of end tag
if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nEndL, _T('<') ) )
return x_ParseError( _T("End tag of %s element not found"), csName );
// Look through tokens of end tag
token.nNext = m_aPos[iPos].nEndL + 1;
int nTokenCount = 0;
while ( x_FindToken( token ) )
{
++nTokenCount;
if ( ! token.bIsString )
{
// Is first token not an end slash mark?
if ( nTokenCount == 1 && m_csDoc[token.nL] != _T('/') )
return x_ParseError( _T("Expecting end tag of element %s"), csName );
else if ( nTokenCount == 2 && ! token.Match(csName) )
return x_ParseError( _T("End tag does not correspond to %s"), csName );
// Else is it a right angle bracket?
else if ( m_csDoc[token.nL] == _T('>') )
break;
}
}
// Was a right angle bracket not found?
if ( ! token.szDoc[token.nL] || nTokenCount < 2 )
return x_ParseError( _T("End tag not completed for element %s"), csName );
m_aPos[iPos].nEndR = token.nL;
}
// Successfully parsed element (and contained elements)
return iPos;
}
bool CMarkup::x_FindChar( LPCTSTR szDoc, int& nChar, _TCHAR c )
{
// static function
LPCTSTR pChar = &szDoc[nChar];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -