📄 cpl_minixml.cpp
字号:
/**********************************************************************
* $Id: cpl_minixml.cpp 11363 2007-04-26 19:09:35Z warmerdam $
*
* Project: CPL - Common Portability Library
* Purpose: Implementation of MiniXML Parser and handling.
* Author: Frank Warmerdam, warmerdam@pobox.com
*
**********************************************************************
* Copyright (c) 2001, Frank Warmerdam
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
**********************************************************************
*
* Independent Security Audit 2003/04/05 Andrey Kiselev:
* Completed audit of this module. Any documents may be parsed without
* buffer overflows and stack corruptions.
*
* Security Audit 2003/03/28 warmerda:
* Completed security audit. I believe that this module may be safely used
* to parse, and serialize arbitrary documents provided by a potentially
* hostile source.
*
*/
#include "cpl_minixml.h"
#include "cpl_error.h"
#include "cpl_conv.h"
#include "cpl_string.h"
#include <ctype.h>
CPL_CVSID("$Id: cpl_minixml.cpp 11363 2007-04-26 19:09:35Z warmerdam $");
typedef enum {
TNone,
TString,
TOpen,
TClose,
TEqual,
TToken,
TSlashClose,
TQuestionClose,
TComment,
TLiteral
} XMLTokenType;
typedef struct {
const char *pszInput;
int nInputOffset;
int nInputLine;
int bInElement;
XMLTokenType eTokenType;
char *pszToken;
size_t nTokenMaxSize;
size_t nTokenSize;
int nStackMaxSize;
int nStackSize;
CPLXMLNode **papsStack;
CPLXMLNode *psFirstNode;
} ParseContext;
/************************************************************************/
/* ReadChar() */
/************************************************************************/
static char ReadChar( ParseContext *psContext )
{
char chReturn;
chReturn = psContext->pszInput[psContext->nInputOffset++];
if( chReturn == '\0' )
psContext->nInputOffset--;
else if( chReturn == 10 )
psContext->nInputLine++;
return chReturn;
}
/************************************************************************/
/* UnreadChar() */
/************************************************************************/
static void UnreadChar( ParseContext *psContext, char chToUnread )
{
if( chToUnread == '\0' )
{
/* do nothing */
}
else
{
CPLAssert( chToUnread
== psContext->pszInput[psContext->nInputOffset-1] );
psContext->nInputOffset--;
if( chToUnread == 10 )
psContext->nInputLine--;
}
}
/************************************************************************/
/* AddToToken() */
/************************************************************************/
static void AddToToken( ParseContext *psContext, char chNewChar )
{
if( psContext->pszToken == NULL )
{
psContext->nTokenMaxSize = 10;
psContext->pszToken = (char *) CPLMalloc(psContext->nTokenMaxSize);
}
else if( psContext->nTokenSize >= psContext->nTokenMaxSize - 2 )
{
psContext->nTokenMaxSize *= 2;
psContext->pszToken = (char *)
CPLRealloc(psContext->pszToken,psContext->nTokenMaxSize);
}
psContext->pszToken[psContext->nTokenSize++] = chNewChar;
psContext->pszToken[psContext->nTokenSize] = '\0';
}
/************************************************************************/
/* ReadToken() */
/************************************************************************/
static XMLTokenType ReadToken( ParseContext *psContext )
{
char chNext;
psContext->nTokenSize = 0;
psContext->pszToken[0] = '\0';
chNext = ReadChar( psContext );
while( isspace(chNext) )
chNext = ReadChar( psContext );
/* -------------------------------------------------------------------- */
/* Handle comments. */
/* -------------------------------------------------------------------- */
if( chNext == '<'
&& EQUALN(psContext->pszInput+psContext->nInputOffset,"!--",3) )
{
psContext->eTokenType = TComment;
// Skip "!--" characters
ReadChar(psContext);
ReadChar(psContext);
ReadChar(psContext);
while( !EQUALN(psContext->pszInput+psContext->nInputOffset,"-->",3)
&& (chNext = ReadChar(psContext)) != '\0' )
AddToToken( psContext, chNext );
// Skip "-->" characters
ReadChar(psContext);
ReadChar(psContext);
ReadChar(psContext);
}
/* -------------------------------------------------------------------- */
/* Handle DOCTYPE. */
/* -------------------------------------------------------------------- */
else if( chNext == '<'
&& EQUALN(psContext->pszInput+psContext->nInputOffset,"!DOCTYPE",8) )
{
int bInQuotes = FALSE;
psContext->eTokenType = TLiteral;
AddToToken( psContext, '<' );
do {
chNext = ReadChar(psContext);
if( chNext == '\0' )
{
CPLError( CE_Failure, CPLE_AppDefined,
"Parse error in DOCTYPE on or before line %d, "
"reached end of file without '>'.",
psContext->nInputLine );
break;
}
/* Skip the internal DTD subset as NOT SUPPORTED YET (Ticket #755).
* The markup declaration block within a DOCTYPE tag consists of:
* - a left square bracket [
* - a list of declarations
* - a right square bracket ]
* Example:
* <!DOCTYPE RootElement [ ...declarations... ]>
*
* We need to skip all 3 parts, until closing >
*/
if( chNext == '[' )
{
do
{
chNext = ReadChar( psContext );
}
while( chNext != ']'
&& !EQUALN(psContext->pszInput+psContext->nInputOffset,"]>", 2) );
// Skip "]" character to point to the closing ">"
chNext = ReadChar( psContext );
chNext = ReadChar( psContext );
}
if( chNext == '\"' )
bInQuotes = !bInQuotes;
if( chNext == '>' && !bInQuotes )
{
AddToToken( psContext, '>' );
break;
}
AddToToken( psContext, chNext );
} while( TRUE );
}
/* -------------------------------------------------------------------- */
/* Handle CDATA. */
/* -------------------------------------------------------------------- */
else if( chNext == '<'
&& EQUALN(psContext->pszInput+psContext->nInputOffset,"![CDATA[",8) )
{
psContext->eTokenType = TString;
// Skip !CDATA[
ReadChar( psContext );
ReadChar( psContext );
ReadChar( psContext );
ReadChar( psContext );
ReadChar( psContext );
ReadChar( psContext );
ReadChar( psContext );
ReadChar( psContext );
while( !EQUALN(psContext->pszInput+psContext->nInputOffset,"]]>",3)
&& (chNext = ReadChar(psContext)) != '\0' )
AddToToken( psContext, chNext );
// Skip "]]>" characters
ReadChar(psContext);
ReadChar(psContext);
ReadChar(psContext);
}
/* -------------------------------------------------------------------- */
/* Simple single tokens of interest. */
/* -------------------------------------------------------------------- */
else if( chNext == '<' && !psContext->bInElement )
{
psContext->eTokenType = TOpen;
psContext->bInElement = TRUE;
}
else if( chNext == '>' && psContext->bInElement )
{
psContext->eTokenType = TClose;
psContext->bInElement = FALSE;
}
else if( chNext == '=' && psContext->bInElement )
{
psContext->eTokenType = TEqual;
}
else if( chNext == '\0' )
{
psContext->eTokenType = TNone;
}
/* -------------------------------------------------------------------- */
/* Handle the /> token terminator. */
/* -------------------------------------------------------------------- */
else if( chNext == '/' && psContext->bInElement
&& psContext->pszInput[psContext->nInputOffset] == '>' )
{
chNext = ReadChar( psContext );
CPLAssert( chNext == '>' );
psContext->eTokenType = TSlashClose;
psContext->bInElement = FALSE;
}
/* -------------------------------------------------------------------- */
/* Handle the ?> token terminator. */
/* -------------------------------------------------------------------- */
else if( chNext == '?' && psContext->bInElement
&& psContext->pszInput[psContext->nInputOffset] == '>' )
{
chNext = ReadChar( psContext );
CPLAssert( chNext == '>' );
psContext->eTokenType = TQuestionClose;
psContext->bInElement = FALSE;
}
/* -------------------------------------------------------------------- */
/* Collect a quoted string. */
/* -------------------------------------------------------------------- */
else if( psContext->bInElement && chNext == '"' )
{
psContext->eTokenType = TString;
while( (chNext = ReadChar(psContext)) != '"'
&& chNext != '\0' )
AddToToken( psContext, chNext );
if( chNext != '"' )
{
psContext->eTokenType = TNone;
CPLError( CE_Failure, CPLE_AppDefined,
"Parse error on line %d, reached EOF before closing quote.",
psContext->nInputLine );
}
/* Do we need to unescape it? */
if( strchr(psContext->pszToken,'&') != NULL )
{
int nLength;
char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
&nLength, CPLES_XML );
strcpy( psContext->pszToken, pszUnescaped );
CPLFree( pszUnescaped );
psContext->nTokenSize = strlen(psContext->pszToken );
}
}
else if( psContext->bInElement && chNext == '\'' )
{
psContext->eTokenType = TString;
while( (chNext = ReadChar(psContext)) != '\''
&& chNext != '\0' )
AddToToken( psContext, chNext );
if( chNext != '\'' )
{
psContext->eTokenType = TNone;
CPLError( CE_Failure, CPLE_AppDefined,
"Parse error on line %d, reached EOF before closing quote.",
psContext->nInputLine );
}
/* Do we need to unescape it? */
if( strchr(psContext->pszToken,'&') != NULL )
{
int nLength;
char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
&nLength, CPLES_XML );
strcpy( psContext->pszToken, pszUnescaped );
CPLFree( pszUnescaped );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -