tinyxmlparsera.cpp

来自「文字編輯器源碼 Text editor source code」· C++ 代码 · 共 1,158 行 · 第 1/2 页
CPP
1,158 行
/*www.sourceforge.net/projects/tinyxmlOriginal code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use thissoftware in a product, an acknowledgment in the product documentationwould be appreciated but is not required.2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.3. This notice may not be removed or altered from any source distribution.*/#include "tinyxmlA.h"#include <ctype.h>//#define DEBUG_PARSER// Note tha "PutString" hardcodes the same list. This// is less flexible than it appears. Changing the entries// or order will break putstring.	TiXmlBaseA::Entity TiXmlBaseA::entity[ NUM_ENTITY ] = {	{ "&amp;",  5, '&' },	{ "&lt;",   4, '<' },	{ "&gt;",   4, '>' },	{ "&quot;", 6, '\"' },	{ "&apos;", 6, '\'' }};class TiXmlParsingDataA{	friend class TiXmlDocumentA;  public:	//TiXmlParsingDataA( const char* now, const TiXmlParsingDataA* prevData );	void Stamp( const char* now );	const TiXmlCursorA& Cursor()	{ return cursor; }	//void Update( const char* now );  private:	// Only used by the document!	TiXmlParsingDataA( const char* start, int _tabsize, int row, int col )	{		assert( start );		stamp = start;		tabsize = _tabsize;		cursor.row = row;		cursor.col = col;	}	TiXmlCursorA		cursor;	const char*		stamp;	int				tabsize;};void TiXmlParsingDataA::Stamp( const char* now ){	assert( now );	// Do nothing if the tabsize is 0.	if ( tabsize < 1 )	{		return;	}	// Get the current row, column.	int row = cursor.row;	int col = cursor.col;	const char* p = stamp;	assert( p );	while ( p < now )	{		// Code contributed by Fletcher Dunn: (modified by lee)		switch (*p) {			case 0:				// We *should* never get here, but in case we do, don't				// advance past the terminating null character, ever				return;			case '\r':				// bump down to the next line				++row;				col = 0;								// Eat the character				++p;				// Check for \r\n sequence, and treat this as a single character				if (*p == '\n') {					++p;				}				break;			case '\n':				// bump down to the next line				++row;				col = 0;				// Eat the character				++p;				// Check for \n\r sequence, and treat this as a single				// character.  (Yes, this bizarre thing does occur still				// on some arcane platforms...)				if (*p == '\r') {					++p;				}				break;			case '\t':				// Eat the character				++p;				// Skip to next tab stop				col = (col / tabsize + 1) * tabsize;				break;			default:				// Eat the character				++p;				// Normal char - just advance one column				++col;				break;		}	}	cursor.row = row;	cursor.col = col;	assert( cursor.row >= -1 );	assert( cursor.col >= -1 );	stamp = p;	assert( stamp );}const char* TiXmlBaseA::SkipWhiteSpace( const char* p ){	if ( !p || !*p )	{		return 0;	}	while ( p && *p )	{		if ( isspace( *p ) || *p == '\n' || *p =='\r' )		// Still using old rules for white space.			++p;		else			break;	}	return p;}#ifdef TIXMLA_USE_STL/*static*/ bool TiXmlBaseA::StreamWhiteSpace( TIXMLA_ISTREAM * in, TIXMLA_STRING * tag ){	for( ;; )	{		if ( !in->good() ) return false;		int c = in->peek();		if ( !IsWhiteSpace( c ) )			return true;		*tag += in->get();	}}/*static*/ bool TiXmlBaseA::StreamTo( TIXMLA_ISTREAM * in, int character, TIXMLA_STRING * tag ){	while ( in->good() )	{		int c = in->peek();		if ( c == character )			return true;		in->get();		*tag += c;	}	return false;}#endifconst char* TiXmlBaseA::ReadName( const char* p, TIXMLA_STRING * name ){	*name = "";	assert( p );	// Names start with letters or underscores.	// After that, they can be letters, underscores, numbers,	// hyphens, or colons. (Colons are valid ony for namespaces,	// but tinyxml can't tell namespaces from names.)	if (    p && *p 		 && ( isalpha( (unsigned char) *p ) || *p == '_' ) )	{		while(		p && *p				&&	(		isalnum( (unsigned char ) *p ) 						 || *p == '_'						 || *p == '-'						 || *p == '.'						 || *p == ':' ) )		{			(*name) += *p;			++p;		}		return p;	}	return 0;}const char* TiXmlBaseA::GetEntity( const char* p, char* value ){	// Presume an entity, and pull it out.    TIXMLA_STRING ent;	int i;	// Handle the &#x entities.	if (    strncmp( "&#x", p, 3 ) == 0 	     && *(p+3) 		 && *(p+4) 		 && ( *(p+4) == ';' || *(p+5) == ';' )	   )	{		*value = 0;		if ( *(p+4) == ';' )		{			// Short, one value entity.			if ( isalpha( *(p+3) ) ) *value += ( tolower( *(p+3) ) - 'a' + 10 );			else				     *value += ( *(p+3) - '0' );			return p+5;		}		else		{			// two value entity			if ( isalpha( *(p+3) ) ) *value += ( tolower( *(p+3) ) - 'a' + 10 ) * 16;			else				     *value += ( *(p+3) - '0' ) * 16;			if ( isalpha( *(p+4) ) ) *value += ( tolower( *(p+4) ) - 'a' + 10 );			else				     *value += ( *(p+4) - '0' );			return p+6;		}	}	// Now try to match it.	for( i=0; i<NUM_ENTITY; ++i )	{		if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )		{			assert( strlen( entity[i].str ) == entity[i].strLength );			*value = entity[i].chr;			return ( p + entity[i].strLength );		}	}	// So it wasn't an entity, its unrecognized, or something like that.	*value = *p;	// Don't put back the last one, since we return it!	return p+1;}bool TiXmlBaseA::StringEqual( const char* p,							 const char* tag,							 bool ignoreCase ){	assert( p );	if ( !p || !*p )	{		assert( 0 );		return false;	}    if ( tolower( *p ) == tolower( *tag ) )	{		const char* q = p;		if (ignoreCase)		{			while ( *q && *tag && *q == *tag )			{				++q;				++tag;			}			if ( *tag == 0 )		// Have we found the end of the tag, and everything equal?			{				return true;			}		}		else		{			while ( *q && *tag && tolower( *q ) == tolower( *tag ) )			{				++q;				++tag;			}			if ( *tag == 0 )			{				return true;			}		}	}	return false;}const char* TiXmlBaseA::ReadText(	const char* p, 									TIXMLA_STRING * text, 									bool trimWhiteSpace, 									const char* endTag, 									bool caseInsensitive ){    *text = "";	if (    !trimWhiteSpace			// certain tags always keep whitespace		 || !condenseWhiteSpace )	// if true, whitespace is always kept	{		// Keep all the white space.		while (	   p && *p				&& !StringEqual( p, endTag, caseInsensitive )			  )		{			char c;			p = GetChar( p, &c );            (* text) += c;		}	}	else	{		bool whitespace = false;		// Remove leading white space:		p = SkipWhiteSpace( p );		while (	   p && *p				&& !StringEqual( p, endTag, caseInsensitive ) )		{			if ( *p == '\r' || *p == '\n' )			{				whitespace = true;				++p;			}			else if ( isspace( *p ) )			{				whitespace = true;				++p;			}			else			{				// If we've found whitespace, add it before the				// new character. Any whitespace just becomes a space.				if ( whitespace )				{               (* text) += ' ';					whitespace = false;				}				char c;				p = GetChar( p, &c );            (* text) += c;			}		}	}	return p + strlen( endTag );}#ifdef TIXMLA_USE_STLvoid TiXmlDocumentA::StreamIn( TIXMLA_ISTREAM * in, TIXMLA_STRING * tag ){	// The basic issue with a document is that we don't know what we're	// streaming. Read something presumed to be a tag (and hope), then	// identify it, and call the appropriate stream method on the tag.	//	// This "pre-streaming" will never read the closing ">" so the	// sub-tag can orient itself.	if ( !StreamTo( in, '<', tag ) ) 	{		SetError( TIXMLA_ERROR_PARSING_EMPTY, 0, 0 );		return;	}	while ( in->good() )	{		int tagIndex = tag->length();		while ( in->good() && in->peek() != '>' )		{			int c = in->get();			(*tag) += (char) c;		}		if ( in->good() )		{			// We now have something we presume to be a node of 			// some sort. Identify it, and call the node to			// continue streaming.			TiXmlNodeA* node = Identify( tag->c_str() + tagIndex );			if ( node )			{				node->StreamIn( in, tag );				bool isElement = node->ToElement() != 0;				delete node;				node = 0;				// If this is the root element, we're done. Parsing will be				// done by the >> operator.				if ( isElement )				{					return;				}			}			else			{				SetError( TIXMLA_ERROR, 0, 0 );				return;			}		}	}	// We should have returned sooner.	SetError( TIXMLA_ERROR, 0, 0 );}#endifconst char* TiXmlDocumentA::Parse( const char* p, TiXmlParsingDataA* prevData ){	ClearError();	// Parse away, at the document level. Since a document	// contains nothing but other tags, most of what happens	// here is skipping white space.	if ( !p || !*p )	{		SetError( TIXMLA_ERROR_DOCUMENT_EMPTY, 0, 0 );		return 0;	}	// Note that, for a document, this needs to come	// before the while space skip, so that parsing	// starts from the pointer we are given.	location.Clear();	if ( prevData )	{		location.row = prevData->cursor.row;		location.col = prevData->cursor.col;	}	else	{		location.row = 0;		location.col = 0;	}	TiXmlParsingDataA data( p, TabSize(), location.row, location.col );	location = data.Cursor();    p = SkipWhiteSpace( p );	if ( !p )	{		SetError( TIXMLA_ERROR_DOCUMENT_EMPTY, 0, 0 );		return 0;	}	while ( p && *p )	{		TiXmlNodeA* node = Identify( p );		if ( node )		{			p = node->Parse( p, &data );			LinkEndChild( node );		}		else		{			break;		}		p = SkipWhiteSpace( p );	}	// All is well.	return p;}void TiXmlDocumentA::SetError( int err, const char* pError, TiXmlParsingDataA* data ){		// The first error in a chain is more accurate - don't set again!	if ( error )		return;	assert( err > 0 && err < TIXMLA_ERROR_STRING_COUNT );	error   = true;	errorId = err;	errorDesc = errorString[ errorId ];	errorLocation.Clear();	if ( pError && data )	{		//TiXmlParsingDataA data( pError, prevData );		data->Stamp( pError );		errorLocation = data->Cursor();	}}TiXmlNodeA* TiXmlNodeA::Identify( const char* p ){	TiXmlNodeA* returnNode = 0;	p = SkipWhiteSpace( p );	if( !p || !*p || *p != '<' )	{		return 0;	}	TiXmlDocumentA* doc = GetDocument();	p = SkipWhiteSpace( p );	if ( !p || !*p )	{		return 0;	}	// What is this thing? 	// - Elements start with a letter or underscore, but xml is reserved.	// - Comments: <!--	// - Decleration: <?xml	// - Everthing else is unknown to tinyxml.	//	const char* xmlHeader = { "<?xml" };	const char* commentHeader = { "<!--" };	if ( StringEqual( p, xmlHeader, true ) )	{		#ifdef DEBUG_PARSER			TIXMLA_LOG( "XML parsing Declaration\n" );		#endif		returnNode = new TiXmlDeclarationA();	}	else if (    isalpha( *(p+1) )			  || *(p+1) == '_' )	{		#ifdef DEBUG_PARSER			TIXMLA_LOG( "XML parsing Element\n" );		#endif		returnNode = new TiXmlElementA( "" );	}	else if ( StringEqual( p, commentHeader, false ) )	{		#ifdef DEBUG_PARSER			TIXMLA_LOG( "XML parsing Comment\n" );		#endif		returnNode = new TiXmlCommentA();	}	else	{		#ifdef DEBUG_PARSER			TIXMLA_LOG( "XML parsing Unknown\n" );		#endif		returnNode = new TiXmlUnknownA();	}	if ( returnNode )	{		// Set the parent, so it can report errors		returnNode->parent = this;	}	else	{
tinyxmlparsera.cpp - 源码说明

本页面展示了「文字編輯器源碼 Text editor source code」中的 tinyxmlparsera.cpp 源码文件，采用 C++ 编程语言编写，共 1,158 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫开发者社区收录了大量与文本编辑器相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?