📄 tinyxmlparser.cpp
字号:
if ( !p || !*p )
{
assert( 0 );
return false;
}
const char* q = p;
if ( ignoreCase )
{
while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
{
++q;
++tag;
}
if ( *tag == 0 )
return true;
}
else
{
while ( *q && *tag && *q == *tag )
{
++q;
++tag;
}
if ( *tag == 0 ) // Have we found the end of the tag, and everything equal?
return true;
}
return false;
}
const char* TiXmlBase::ReadText( const char* p,
TIXML_STRING * text,
bool trimWhiteSpace,
const char* endTag,
bool caseInsensitive,
TiXmlEncoding encoding )
{
*text = "";
if ( !trimWhiteSpace // certain tags always keep whitespace
|| !condenseWhiteSpace ) // if true, whitespace is always kept
{
// Keep all the white space.
while ( p && *p
&& !StringEqual( p, endTag, caseInsensitive, encoding )
)
{
int len;
char cArr[4] = { 0, 0, 0, 0 };
p = GetChar( p, cArr, &len, encoding );
text->append( cArr, len );
}
}
else
{
bool whitespace = false;
// Remove leading white space:
p = SkipWhiteSpace( p, encoding );
while ( p && *p
&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
{
if ( *p == '\r' || *p == '\n' )
{
whitespace = true;
++p;
}
else if ( IsWhiteSpace( *p ) )
{
whitespace = true;
++p;
}
else
{
// If we've found whitespace, add it before the
// new character. Any whitespace just becomes a space.
if ( whitespace )
{
(*text) += ' ';
whitespace = false;
}
int len;
char cArr[4] = { 0, 0, 0, 0 };
p = GetChar( p, cArr, &len, encoding );
if ( len == 1 )
(*text) += cArr[0]; // more efficient
else
text->append( cArr, len );
}
}
}
return p + strlen( endTag );
}
#ifdef TIXML_USE_STL
void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
{
// The basic issue with a document is that we don't know what we're
// streaming. Read something presumed to be a tag (and hope), then
// identify it, and call the appropriate stream method on the tag.
//
// This "pre-streaming" will never read the closing ">" so the
// sub-tag can orient itself.
if ( !StreamTo( in, '<', tag ) )
{
SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
while ( in->good() )
{
int tagIndex = (int) tag->length();
while ( in->good() && in->peek() != '>' )
{
int c = in->get();
if ( c <= 0 )
{
SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
break;
}
(*tag) += (char) c;
}
if ( in->good() )
{
// We now have something we presume to be a node of
// some sort. Identify it, and call the node to
// continue streaming.
TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
if ( node )
{
node->StreamIn( in, tag );
bool isElement = node->ToElement() != 0;
delete node;
node = 0;
// If this is the root element, we're done. Parsing will be
// done by the >> operator.
if ( isElement )
{
return;
}
}
else
{
SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
}
}
// We should have returned sooner.
SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
}
#endif
const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
{
ClearError();
// Parse away, at the document level. Since a document
// contains nothing but other tags, most of what happens
// here is skipping white space.
if ( !p || !*p )
{
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
return 0;
}
// Note that, for a document, this needs to come
// before the while space skip, so that parsing
// starts from the pointer we are given.
location.Clear();
if ( prevData )
{
location.row = prevData->cursor.row;
location.col = prevData->cursor.col;
}
else
{
location.row = 0;
location.col = 0;
}
TiXmlParsingData data( p, TabSize(), location.row, location.col );
location = data.Cursor();
if ( encoding == TIXML_ENCODING_UNKNOWN )
{
// Check for the Microsoft UTF-8 lead bytes.
const unsigned char* pU = (const unsigned char*)p;
if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
&& *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
&& *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
{
encoding = TIXML_ENCODING_UTF8;
useMicrosoftBOM = true;
}
}
p = SkipWhiteSpace( p, encoding );
if ( !p )
{
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
return 0;
}
while ( p && *p )
{
TiXmlNode* node = Identify( p, encoding );
if ( node )
{
p = node->Parse( p, &data, encoding );
LinkEndChild( node );
}
else
{
break;
}
// Did we get encoding info?
if ( encoding == TIXML_ENCODING_UNKNOWN
&& node->ToDeclaration() )
{
TiXmlDeclaration* dec = node->ToDeclaration();
const char* enc = dec->Encoding();
assert( enc );
if ( *enc == 0 )
encoding = TIXML_ENCODING_UTF8;
else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
encoding = TIXML_ENCODING_UTF8;
else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
else
encoding = TIXML_ENCODING_LEGACY;
}
p = SkipWhiteSpace( p, encoding );
}
// Was this empty?
if ( !firstChild ) {
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
return 0;
}
// All is well.
return p;
}
void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
{
// The first error in a chain is more accurate - don't set again!
if ( error )
return;
assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
error = true;
errorId = err;
errorDesc = errorString[ errorId ];
errorLocation.Clear();
if ( pError && data )
{
data->Stamp( pError, encoding );
errorLocation = data->Cursor();
}
}
TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
{
TiXmlNode* returnNode = 0;
p = SkipWhiteSpace( p, encoding );
if( !p || !*p || *p != '<' )
{
return 0;
}
TiXmlDocument* doc = GetDocument();
p = SkipWhiteSpace( p, encoding );
if ( !p || !*p )
{
return 0;
}
// What is this thing?
// - Elements start with a letter or underscore, but xml is reserved.
// - Comments: <!--
// - Decleration: <?xml
// - Everthing else is unknown to tinyxml.
//
const char* xmlHeader = { "<?xml" };
const char* commentHeader = { "<!--" };
const char* dtdHeader = { "<!" };
const char* cdataHeader = { "<![CDATA[" };
if ( StringEqual( p, xmlHeader, true, encoding ) )
{
#ifdef DEBUG_PARSER
TIXML_LOG( "XML parsing Declaration\n" );
#endif
returnNode = new TiXmlDeclaration();
}
else if ( StringEqual( p, commentHeader, false, encoding ) )
{
#ifdef DEBUG_PARSER
TIXML_LOG( "XML parsing Comment\n" );
#endif
returnNode = new TiXmlComment();
}
else if ( StringEqual( p, cdataHeader, false, encoding ) )
{
#ifdef DEBUG_PARSER
TIXML_LOG( "XML parsing CDATA\n" );
#endif
TiXmlText* text = new TiXmlText( "" );
text->SetCDATA( true );
returnNode = text;
}
else if ( StringEqual( p, dtdHeader, false, encoding ) )
{
#ifdef DEBUG_PARSER
TIXML_LOG( "XML parsing Unknown(1)\n" );
#endif
returnNode = new TiXmlUnknown();
}
else if ( IsAlpha( *(p+1), encoding )
|| *(p+1) == '_' )
{
#ifdef DEBUG_PARSER
TIXML_LOG( "XML parsing Element\n" );
#endif
returnNode = new TiXmlElement( "" );
}
else
{
#ifdef DEBUG_PARSER
TIXML_LOG( "XML parsing Unknown(2)\n" );
#endif
returnNode = new TiXmlUnknown();
}
if ( returnNode )
{
// Set the parent, so it can report errors
returnNode->parent = this;
}
else
{
if ( doc )
doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
}
return returnNode;
}
#ifdef TIXML_USE_STL
void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
{
// We're called with some amount of pre-parsing. That is, some of "this"
// element is in "tag". Go ahead and stream to the closing ">"
while( in->good() )
{
int c = in->get();
if ( c <= 0 )
{
TiXmlDocument* document = GetDocument();
if ( document )
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
(*tag) += (char) c ;
if ( c == '>' )
break;
}
if ( tag->length() < 3 ) return;
// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
// If not, identify and stream.
if ( tag->at( tag->length() - 1 ) == '>'
&& tag->at( tag->length() - 2 ) == '/' )
{
// All good!
return;
}
else if ( tag->at( tag->length() - 1 ) == '>' )
{
// There is more. Could be:
// text
// closing tag
// another node.
for ( ;; )
{
StreamWhiteSpace( in, tag );
// Do we have text?
if ( in->good() && in->peek() != '<' )
{
// Yep, text.
TiXmlText text( "" );
text.StreamIn( in, tag );
// What follows text is a closing tag or another node.
// Go around again and figure it out.
continue;
}
// We now have either a closing tag...or another node.
// We should be at a "<", regardless.
if ( !in->good() ) return;
assert( in->peek() == '<' );
int tagIndex = (int) tag->length();
bool closingTag = false;
bool firstCharFound = false;
for( ;; )
{
if ( !in->good() )
return;
int c = in->peek();
if ( c <= 0 )
{
TiXmlDocument* document = GetDocument();
if ( document )
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
if ( c == '>' )
break;
*tag += (char) c;
in->get();
if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
{
firstCharFound = true;
if ( c == '/' )
closingTag = true;
}
}
// If it was a closing tag, then read in the closing '>' to clean up the input stream.
// If it was not, the streaming will be done by the tag.
if ( closingTag )
{
if ( !in->good() )
return;
int c = in->get();
if ( c <= 0 )
{
TiXmlDocument* document = GetDocument();
if ( document )
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
assert( c == '>' );
*tag += (char) c;
// We are done, once we've found our closing tag.
return;
}
else
{
// If not a closing tag, id it, and stream.
const char* tagloc = tag->c_str() + tagIndex;
TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
if ( !node )
return;
node->StreamIn( in, tag );
delete node;
node = 0;
// No return: go around from the beginning: text, closing tag, or node.
}
}
}
}
#endif
const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
{
p = SkipWhiteSpace( p, encoding );
TiXmlDocument* document = GetDocument();
if ( !p || !*p )
{
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
return 0;
}
if ( data )
{
data->Stamp( p, encoding );
location = data->Cursor();
}
if ( *p != '<' )
{
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
return 0;
}
p = SkipWhiteSpace( p+1, encoding );
// Read the name.
const char* pErr = p;
p = ReadName( p, &value, encoding );
if ( !p || !*p )
{
if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
return 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -