📄 txml_parser.cpp
字号:
if ( !p || !*p )
{
if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding_ );
return 0;
}
if ( *p == '/' )
{
++p;
// empty tag.
if ( *p != '>' )
{
if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_EMPTY, p, data, encoding_ );
return 0;
}
return (p+1);
}
else if ( *p == '>' )
{
// Done with attributes_ (if there were any.)
// read the value_ -- which can include other
// elements -- read_ the end tag, and return.
++p;
p = read_value( p, data, encoding_ ); // Note this is an element method, and will set the error_ if one happens.
if ( !p || !*p ) {
// We were looking for the end tag, but found nothing.
// Fix for [ 1663758 ] Failure to report error_ on bad XML
if ( document_ ) document_->set_error( TIXML_ERROR_READING_END_TAG, p, data, encoding_ );
return 0;
}
// We should find the end tag now
if ( string_equal( p, endTag.c_str(), false, encoding_ ) )
{
p += endTag.length();
return p;
}
else
{
if ( document_ ) document_->set_error( TIXML_ERROR_READING_END_TAG, p, data, encoding_ );
return 0;
}
}
else
{
// Try to read_ an attribute_:
attribute* attrib = new attribute();
if ( !attrib )
{
if ( document_ ) document_->set_error( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding_ );
return 0;
}
attrib->set_document( document_ );
pErr = p;
p = attrib->parse( p, data, encoding_ );
if ( !p || !*p )
{
if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding_ );
delete attrib;
return 0;
}
// Handle the strange case of double attributes_:
#ifdef TIXML_USE_STL
attribute* node_ = attributeSet.find( attrib->name_t_str() );
#else
attribute* node_ = attributeSet.find( attrib->name() );
#endif
if ( node_ )
{
node_->set_value( attrib->value() );
delete attrib;
return 0;
}
attributeSet.add( attrib );
}
}
return p;
}
const char* element::read_value( const char* p, parsing_data* data, encoding encoding_ )
{
document* document_ = get_document();
// read in text_ and elements in any order.
const char* pWithWhiteSpace = p;
p = skip_white_space( p, encoding_ );
while ( p && *p )
{
if ( *p != '<' )
{
// Take what we have, make a text_ element_.
text* textNode = new text( "" );
if ( !textNode )
{
if ( document_ ) document_->set_error( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding_ );
return 0;
}
if ( base::is_white_space_condensed() )
{
p = textNode->parse( p, data, encoding_ );
}
else
{
// Special case: we want to keep the white_ space
// so that leading spaces aren't removed.
p = textNode->parse( pWithWhiteSpace, data, encoding_ );
}
if ( !textNode->blank() )
link_end_child( textNode );
else
delete textNode;
}
else
{
// We hit a '<'
// Have we hit a new element_ or an end tag? this could_ also be
// a text in the "CDATA" style.
if ( string_equal( p, "</", false, encoding_ ) )
{
return p;
}
else
{
node* node_ = identify( p, encoding_ );
if ( node_ )
{
p = node_->parse( p, data, encoding_ );
link_end_child( node_ );
}
else
{
return 0;
}
}
}
pWithWhiteSpace = p;
p = skip_white_space( p, encoding_ );
}
if ( !p )
{
if ( document_ ) document_->set_error( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding_ );
}
return p;
}
#ifdef TIXML_USE_STL
void unknown::stream_in( std::istream * in, TIXML_STRING * tag )
{
while ( in->good() )
{
int c = in->get();
if ( c <= 0 )
{
document* document_ = get_document();
if ( document_ )
document_->set_error( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
(*tag) += (char) c;
if ( c == '>' )
{
// All is well.
return;
}
}
}
#endif
const char* unknown::parse( const char* p, parsing_data* data, encoding encoding_ )
{
document* document_ = get_document();
p = skip_white_space( p, encoding_ );
if ( data )
{
data->stamp( p, encoding_ );
location_ = data->get_cursor();
}
if ( !p || !*p || *p != '<' )
{
if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding_ );
return 0;
}
++p;
value_ = "";
while ( p && *p && *p != '>' )
{
value_ += *p;
++p;
}
if ( !p )
{
if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding_ );
}
if ( *p == '>' )
return p+1;
return p;
}
#ifdef TIXML_USE_STL
void comment::stream_in( std::istream * in, TIXML_STRING * tag )
{
while ( in->good() )
{
int c = in->get();
if ( c <= 0 )
{
document* document_ = get_document();
if ( document_ )
document_->set_error( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
(*tag) += (char) c;
if ( c == '>'
&& tag->at( tag->length() - 2 ) == '-'
&& tag->at( tag->length() - 3 ) == '-' )
{
// All is well.
return;
}
}
}
#endif
const char* comment::parse( const char* p, parsing_data* data, encoding encoding_ )
{
document* document_ = get_document();
value_ = "";
p = skip_white_space( p, encoding_ );
if ( data )
{
data->stamp( p, encoding_ );
location_ = data->get_cursor();
}
const char* startTag = "<!--";
const char* endTag = "-->";
if ( !string_equal( p, startTag, false, encoding_ ) )
{
document_->set_error( TIXML_ERROR_PARSING_COMMENT, p, data, encoding_ );
return 0;
}
p += strlen( startTag );
// [ 1475201 ] TinyXML parses entities in comments_
// Oops - read_text doesn't work, because we don't want to parse_ the entities.
// p = read_text( p, &value_, false, endTag, false, encoding_ );
//
// from the XML spec:
/*
[Definition: comments may appear anywhere in a document_ outside other markup; in addition,
they may appear within the document_ type_ declaration_ at places allowed by the grammar.
They are_ not part of the document_'s character data; an XML processor MAY, but need not,
make it_ possible for an application to retrieve the text_ of comments_. For compatibility,
the string "--" (double-hyphen) MUST NOT occur within comments_.] Parameter entity_
references MUST NOT be recognized within comments_.
An example of a comment_:
<!-- declarations for <head> & <body> -->
*/
value_ = "";
// Keep all the white_ space.
while ( p && *p && !string_equal( p, endTag, false, encoding_ ) )
{
value_.append( p, 1 );
++p;
}
if ( p )
p += strlen( endTag );
return p;
}
const char* attribute::parse( const char* p, parsing_data* data, encoding encoding_ )
{
p = skip_white_space( p, encoding_ );
if ( !p || !*p ) return 0;
// int tabsize = 4;
// if ( document_ )
// tabsize = document_->tab_size();
if ( data )
{
data->stamp( p, encoding_ );
location_ = data->get_cursor();
}
// read the name_, the '=' and the value_.
const char* pErr = p;
p = read_name( p, &name_, encoding_ );
if ( !p || !*p )
{
if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding_ );
return 0;
}
p = skip_white_space( p, encoding_ );
if ( !p || !*p || *p != '=' )
{
if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding_ );
return 0;
}
++p; // skip '='
p = skip_white_space( p, encoding_ );
if ( !p || !*p )
{
if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding_ );
return 0;
}
const char* end;
const char SINGLE_QUOTE = '\'';
const char DOUBLE_QUOTE = '\"';
if ( *p == SINGLE_QUOTE )
{
++p;
end = "\'"; // single quote in string
p = read_text( p, &value_, false, end, false, encoding_ );
}
else if ( *p == DOUBLE_QUOTE )
{
++p;
end = "\""; // double quote in string
p = read_text( p, &value_, false, end, false, encoding_ );
}
else
{
// All attribute_ values should be in single or double quotes.
// But this is such a common error_ that the parser will try
// its best, even without them.
value_ = "";
while ( p && *p // existence
&& !is_white_space( *p ) && *p != '\n' && *p != '\r' // whitespace_
&& *p != '/' && *p != '>' ) // tag end
{
if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
// [ 1451649 ] get_attribute values with trailing quotes not handled correctly
// We did not have an opening quote but seem to have a
// closing one. Give up and throw_ an error_.
if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding_ );
return 0;
}
value_ += *p;
++p;
}
}
return p;
}
#ifdef TIXML_USE_STL
void text::stream_in( std::istream * in, TIXML_STRING * tag )
{
while ( in->good() )
{
int c = in->peek();
if ( !cdata && (c == '<' ) )
{
return;
}
if ( c <= 0 )
{
document* document_ = get_document();
if ( document_ )
document_->set_error( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
(*tag) += (char) c;
in->get(); // "commits" the peek made above
if ( cdata && c == '>' && tag->size() >= 3 ) {
size_t len = tag->size();
if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
// terminator of cdata.
return;
}
}
}
}
#endif
const char* text::parse( const char* p, parsing_data* data, encoding encoding_ )
{
value_ = "";
document* document_ = get_document();
if ( data )
{
data->stamp( p, encoding_ );
location_ = data->get_cursor();
}
const char* const startTag = "<![CDATA[";
const char* const endTag = "]]>";
if ( cdata || string_equal( p, startTag, false, encoding_ ) )
{
cdata = true;
if ( !string_equal( p, startTag, false, encoding_ ) )
{
document_->set_error( TIXML_ERROR_PARSING_CDATA, p, data, encoding_ );
return 0;
}
p += strlen( startTag );
// Keep all the white_ space, ignore the encoding_, etc.
while ( p && *p
&& !string_equal( p, endTag, false, encoding_ )
)
{
value_ += *p;
++p;
}
TIXML_STRING dummy;
p = read_text( p, &dummy, false, endTag, false, encoding_ );
return p;
}
else
{
bool ignoreWhite = true;
const char* end = "<";
p = read_text( p, &value_, ignoreWhite, end, false, encoding_ );
if ( p )
return p-1; // don't truncate the '<'
return 0;
}
}
#ifdef TIXML_USE_STL
void declaration::stream_in( std::istream * in, TIXML_STRING * tag )
{
while ( in->good() )
{
int c = in->get();
if ( c <= 0 )
{
document* document_ = get_document();
if ( document_ )
document_->set_error( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
return;
}
(*tag) += (char) c;
if ( c == '>' )
{
// All is well.
return;
}
}
}
#endif
const char* declaration::parse( const char* p, parsing_data* data, encoding _encoding )
{
p = skip_white_space( p, _encoding );
// find the beginning, find the end, and look_ for
// the stuff in-between.
document* document_ = get_document();
if ( !p || !*p || !string_equal( p, "<?xml", true, _encoding ) )
{
if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
return 0;
}
if ( data )
{
data->stamp( p, _encoding );
location_ = data->get_cursor();
}
p += 5;
version_ = "";
encoding_ = "";
standalone_ = "";
while ( p && *p )
{
if ( *p == '>' )
{
++p;
return p;
}
p = skip_white_space( p, _encoding );
if ( string_equal( p, "version", true, _encoding ) )
{
attribute attrib;
p = attrib.parse( p, data, _encoding );
version_ = attrib.value();
}
else if ( string_equal( p, "encoding", true, _encoding ) )
{
attribute attrib;
p = attrib.parse( p, data, _encoding );
encoding_ = attrib.value();
}
else if ( string_equal( p, "standalone", true, _encoding ) )
{
attribute attrib;
p = attrib.parse( p, data, _encoding );
standalone_ = attrib.value();
}
else
{
// read over whatever it_ is.
while( p && *p && *p != '>' && !is_white_space( *p ) )
++p;
}
}
return 0;
}
bool text::blank() const
{
for ( unsigned i=0; i<value_.length(); i++ )
if ( !is_white_space( value_[i] ) )
return false;
return true;
}
} }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -