📄 tinyxmlparser.cpp
字号:
// So it wasn't an entity, its unrecognized, or something like that. *value = *p; // Don't put back the last one, since we return it! return p+1;}bool TiXmlBase::StringEqual( const char* p, const char* tag, bool ignoreCase, TiXmlEncoding encoding ){ assert( p ); assert( tag ); if ( !p || !*p ) { assert( 0 ); return false; } const char* q = p; if ( ignoreCase ) { while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) ) { ++q; ++tag; } if ( *tag == 0 ) return true; } else { while ( *q && *tag && *q == *tag ) { ++q; ++tag; } if ( *tag == 0 ) // Have we found the end of the tag, and everything equal? return true; } return false;}const char* TiXmlBase::ReadText( const char* p, TIXML_STRING * text, bool trimWhiteSpace, const char* endTag, bool caseInsensitive, TiXmlEncoding encoding ){ *text = ""; if ( !trimWhiteSpace // certain tags always keep whitespace || !condenseWhiteSpace ) // if true, whitespace is always kept { // Keep all the white space. while ( p && *p && !StringEqual( p, endTag, caseInsensitive, encoding ) ) { int len; char cArr[4] = { 0, 0, 0, 0 }; p = GetChar( p, cArr, &len, encoding ); text->append( cArr, len ); } } else { bool whitespace = false; // Remove leading white space: p = SkipWhiteSpace( p, encoding ); while ( p && *p && !StringEqual( p, endTag, caseInsensitive, encoding ) ) { if ( *p == '\r' || *p == '\n' ) { whitespace = true; ++p; } else if ( IsWhiteSpace( *p ) ) { whitespace = true; ++p; } else { // If we've found whitespace, add it before the // new character. Any whitespace just becomes a space. if ( whitespace ) { (*text) += ' '; whitespace = false; } int len; char cArr[4] = { 0, 0, 0, 0 }; p = GetChar( p, cArr, &len, encoding ); if ( len == 1 ) (*text) += cArr[0]; // more efficient else text->append( cArr, len ); } } } return p + strlen( endTag );}#ifdef TIXML_USE_STLvoid TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ){ // The basic issue with a document is that we don't know what we're // streaming. Read something presumed to be a tag (and hope), then // identify it, and call the appropriate stream method on the tag. // // This "pre-streaming" will never read the closing ">" so the // sub-tag can orient itself. if ( !StreamTo( in, '<', tag ) ) { SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); return; } while ( in->good() ) { int tagIndex = (int) tag->length(); while ( in->good() && in->peek() != '>' ) { int c = in->get(); if ( c <= 0 ) { SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); break; } (*tag) += (char) c; } if ( in->good() ) { // We now have something we presume to be a node of // some sort. Identify it, and call the node to // continue streaming. TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); if ( node ) { node->StreamIn( in, tag ); bool isElement = node->ToElement() != 0; delete node; node = 0; // If this is the root element, we're done. Parsing will be // done by the >> operator. if ( isElement ) { return; } } else { SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); return; } } } // We should have returned sooner. SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );}#endifconst char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding ){ ClearError(); // Parse away, at the document level. Since a document // contains nothing but other tags, most of what happens // here is skipping white space. if ( !p || !*p ) { SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); return 0; } // Note that, for a document, this needs to come // before the while space skip, so that parsing // starts from the pointer we are given. location.Clear(); if ( prevData ) { location.row = prevData->cursor.row; location.col = prevData->cursor.col; } else { location.row = 0; location.col = 0; } TiXmlParsingData data( p, TabSize(), location.row, location.col ); location = data.Cursor(); if ( encoding == TIXML_ENCODING_UNKNOWN ) { // Check for the Microsoft UTF-8 lead bytes. if ( *(p+0) && *(p+0) == (char)(0xef) && *(p+1) && *(p+1) == (char)(0xbb) && *(p+2) && *(p+2) == (char)(0xbf) ) { encoding = TIXML_ENCODING_UTF8; } } p = SkipWhiteSpace( p, encoding ); if ( !p ) { SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); return 0; } while ( p && *p ) { TiXmlNode* node = Identify( p, encoding ); if ( node ) { p = node->Parse( p, &data, encoding ); LinkEndChild( node ); } else { break; } // Did we get encoding info? if ( encoding == TIXML_ENCODING_UNKNOWN && node->ToDeclaration() ) { TiXmlDeclaration* dec = node->ToDeclaration(); const char* enc = dec->Encoding(); assert( enc ); if ( *enc == 0 ) encoding = TIXML_ENCODING_UTF8; else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) encoding = TIXML_ENCODING_UTF8; else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice else encoding = TIXML_ENCODING_LEGACY; } p = SkipWhiteSpace( p, encoding ); } // All is well. return p;}void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding ){ // The first error in a chain is more accurate - don't set again! if ( error ) return; assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); error = true; errorId = err; errorDesc = errorString[ errorId ]; errorLocation.Clear(); if ( pError && data ) { //TiXmlParsingData data( pError, prevData ); data->Stamp( pError, encoding ); errorLocation = data->Cursor(); }}TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ){ TiXmlNode* returnNode = 0; p = SkipWhiteSpace( p, encoding ); if( !p || !*p || *p != '<' ) { return 0; } TiXmlDocument* doc = GetDocument(); p = SkipWhiteSpace( p, encoding ); if ( !p || !*p ) { return 0; } // What is this thing? // - Elements start with a letter or underscore, but xml is reserved. // - Comments: <!-- // - Decleration: <?xml // - Everthing else is unknown to tinyxml. // const char* xmlHeader = { "<?xml" }; const char* commentHeader = { "<!--" }; const char* dtdHeader = { "<!" }; if ( StringEqual( p, xmlHeader, true, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Declaration\n" ); #endif returnNode = new TiXmlDeclaration(); } else if ( StringEqual( p, commentHeader, false, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Comment\n" ); #endif returnNode = new TiXmlComment(); } else if ( StringEqual( p, dtdHeader, false, encoding ) ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Unknown(1)\n" ); #endif returnNode = new TiXmlUnknown(); } else if ( IsAlpha( *(p+1), encoding ) || *(p+1) == '_' ) { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Element\n" ); #endif returnNode = new TiXmlElement( "" ); } else { #ifdef DEBUG_PARSER TIXML_LOG( "XML parsing Unknown(2)\n" ); #endif returnNode = new TiXmlUnknown(); } if ( returnNode ) { // Set the parent, so it can report errors returnNode->parent = this; } else { if ( doc ) doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN ); } return returnNode;}#ifdef TIXML_USE_STLvoid TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag){ // We're called with some amount of pre-parsing. That is, some of "this" // element is in "tag". Go ahead and stream to the closing ">" while( in->good() ) { int c = in->get(); if ( c <= 0 ) { TiXmlDocument* document = GetDocument(); if ( document ) document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); return; } (*tag) += (char) c ; if ( c == '>' ) break; } if ( tag->length() < 3 ) return; // Okay...if we are a "/>" tag, then we're done. We've read a complete tag. // If not, identify and stream. if ( tag->at( tag->length() - 1 ) == '>' && tag->at( tag->length() - 2 ) == '/' ) { // All good! return; } else if ( tag->at( tag->length() - 1 ) == '>' ) { // There is more. Could be: // text // closing tag // another node. for ( ;; ) { StreamWhiteSpace( in, tag ); // Do we have text? if ( in->good() && in->peek() != '<' ) { // Yep, text. TiXmlText text( "" ); text.StreamIn( in, tag ); // What follows text is a closing tag or another node. // Go around again and figure it out. continue; } // We now have either a closing tag...or another node. // We should be at a "<", regardless. if ( !in->good() ) return; assert( in->peek() == '<' ); int tagIndex = tag->length(); bool closingTag = false; bool firstCharFound = false; for( ;; ) { if ( !in->good() ) return; int c = in->peek(); if ( c <= 0 ) { TiXmlDocument* document = GetDocument(); if ( document ) document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); return; } if ( c == '>' ) break; *tag += (char) c; in->get(); if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) ) { firstCharFound = true; if ( c == '/' ) closingTag = true; } } // If it was a closing tag, then read in the closing '>' to clean up the input stream. // If it was not, the streaming will be done by the tag. if ( closingTag ) { if ( !in->good() ) return; int c = in->get(); if ( c <= 0 ) { TiXmlDocument* document = GetDocument(); if ( document ) document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); return; } assert( c == '>' ); *tag += (char) c; // We are done, once we've found our closing tag. return; } else { // If not a closing tag, id it, and stream. const char* tagloc = tag->c_str() + tagIndex; TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING ); if ( !node ) return; node->StreamIn( in, tag ); delete node; node = 0; // No return: go around from the beginning: text, closing tag, or node. } } }}#endifconst char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ){ p = SkipWhiteSpace( p, encoding ); TiXmlDocument* document = GetDocument(); if ( !p || !*p ) { if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -