📄 txml_parser.cpp

📁 j2me is based on j2mepolish, client & server for mobile application.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
		if ( !p || !*p )
		{
			if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding_ );
			return 0;
		}
		if ( *p == '/' )
		{
			++p;
			// empty tag.
			if ( *p  != '>' )
			{
				if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_EMPTY, p, data, encoding_ );		
				return 0;
			}
			return (p+1);
		}
		else if ( *p == '>' )
		{
			// Done with attributes_ (if there were any.)
			// read the value_ -- which can include other
			// elements -- read_ the end tag, and return.
			++p;
			p = read_value( p, data, encoding_ );		// Note this is an element method, and will set the error_ if one happens.
			if ( !p || !*p ) {
				// We were looking for the end tag, but found nothing.
				// Fix for [ 1663758 ] Failure to report error_ on bad XML
				if ( document_ ) document_->set_error( TIXML_ERROR_READING_END_TAG, p, data, encoding_ );
				return 0;
			}

			// We should find the end tag now
			if ( string_equal( p, endTag.c_str(), false, encoding_ ) )
			{
				p += endTag.length();
				return p;
			}
			else
			{
				if ( document_ ) document_->set_error( TIXML_ERROR_READING_END_TAG, p, data, encoding_ );
				return 0;
			}
		}
		else
		{
			// Try to read_ an attribute_:
			attribute* attrib = new attribute();
			if ( !attrib )
			{
				if ( document_ ) document_->set_error( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding_ );
				return 0;
			}

			attrib->set_document( document_ );
			pErr = p;
			p = attrib->parse( p, data, encoding_ );

			if ( !p || !*p )
			{
				if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding_ );
				delete attrib;
				return 0;
			}

			// Handle the strange case of double attributes_:
			#ifdef TIXML_USE_STL
			attribute* node_ = attributeSet.find( attrib->name_t_str() );
			#else
			attribute* node_ = attributeSet.find( attrib->name() );
			#endif
			if ( node_ )
			{
				node_->set_value( attrib->value() );
				delete attrib;
				return 0;
			}

			attributeSet.add( attrib );
		}
	}
	return p;
}


const char* element::read_value( const char* p, parsing_data* data, encoding encoding_ )
{
	document* document_ = get_document();

	// read in text_ and elements in any order.
	const char* pWithWhiteSpace = p;
	p = skip_white_space( p, encoding_ );

	while ( p && *p )
	{
		if ( *p != '<' )
		{
			// Take what we have, make a text_ element_.
			text* textNode = new text( "" );

			if ( !textNode )
			{
				if ( document_ ) document_->set_error( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding_ );
				    return 0;
			}

			if ( base::is_white_space_condensed() )
			{
				p = textNode->parse( p, data, encoding_ );
			}
			else
			{
				// Special case: we want to keep the white_ space
				// so that leading spaces aren't removed.
				p = textNode->parse( pWithWhiteSpace, data, encoding_ );
			}

			if ( !textNode->blank() )
				link_end_child( textNode );
			else
				delete textNode;
		} 
		else 
		{
			// We hit a '<'
			// Have we hit a new element_ or an end tag? this could_ also be
			// a text in the "CDATA" style.
			if ( string_equal( p, "</", false, encoding_ ) )
			{
				return p;
			}
			else
			{
				node* node_ = identify( p, encoding_ );
				if ( node_ )
				{
					p = node_->parse( p, data, encoding_ );
					link_end_child( node_ );
				}				
				else
				{
					return 0;
				}
			}
		}
		pWithWhiteSpace = p;
		p = skip_white_space( p, encoding_ );
	}

	if ( !p )
	{
		if ( document_ ) document_->set_error( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding_ );
	}	
	return p;
}


#ifdef TIXML_USE_STL
void unknown::stream_in( std::istream * in, TIXML_STRING * tag )
{
	while ( in->good() )
	{
		int c = in->get();	
		if ( c <= 0 )
		{
			document* document_ = get_document();
			if ( document_ )
				document_->set_error( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
			return;
		}
		(*tag) += (char) c;

		if ( c == '>' )
		{
			// All is well.
			return;		
		}
	}
}
#endif


const char* unknown::parse( const char* p, parsing_data* data, encoding encoding_ )
{
	document* document_ = get_document();
	p = skip_white_space( p, encoding_ );

	if ( data )
	{
		data->stamp( p, encoding_ );
		location_ = data->get_cursor();
	}
	if ( !p || !*p || *p != '<' )
	{
		if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding_ );
		return 0;
	}
	++p;
    value_ = "";

	while ( p && *p && *p != '>' )
	{
		value_ += *p;
		++p;
	}

	if ( !p )
	{
		if ( document_ )	document_->set_error( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding_ );
	}
	if ( *p == '>' )
		return p+1;
	return p;
}

#ifdef TIXML_USE_STL
void comment::stream_in( std::istream * in, TIXML_STRING * tag )
{
	while ( in->good() )
	{
		int c = in->get();	
		if ( c <= 0 )
		{
			document* document_ = get_document();
			if ( document_ )
				document_->set_error( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
			return;
		}

		(*tag) += (char) c;

		if ( c == '>' 
			 && tag->at( tag->length() - 2 ) == '-'
			 && tag->at( tag->length() - 3 ) == '-' )
		{
			// All is well.
			return;		
		}
	}
}
#endif


const char* comment::parse( const char* p, parsing_data* data, encoding encoding_ )
{
	document* document_ = get_document();
	value_ = "";

	p = skip_white_space( p, encoding_ );

	if ( data )
	{
		data->stamp( p, encoding_ );
		location_ = data->get_cursor();
	}
	const char* startTag = "<!--";
	const char* endTag   = "-->";

	if ( !string_equal( p, startTag, false, encoding_ ) )
	{
		document_->set_error( TIXML_ERROR_PARSING_COMMENT, p, data, encoding_ );
		return 0;
	}
	p += strlen( startTag );

	// [ 1475201 ] TinyXML parses entities in comments_
	// Oops - read_text doesn't work, because we don't want to parse_ the entities.
	// p = read_text( p, &value_, false, endTag, false, encoding_ );
	//
	// from the XML spec:
	/*
	 [Definition: comments may appear anywhere in a document_ outside other markup; in addition, 
	              they may appear within the document_ type_ declaration_ at places allowed by the grammar. 
				  They are_ not part of the document_'s character data; an XML processor MAY, but need not, 
				  make it_ possible for an application to retrieve the text_ of comments_. For compatibility, 
				  the string "--" (double-hyphen) MUST NOT occur within comments_.] Parameter entity_ 
				  references MUST NOT be recognized within comments_.

				  An example of a comment_:

				  <!-- declarations for <head> & <body> -->
	*/

    value_ = "";
	// Keep all the white_ space.
	while (	p && *p && !string_equal( p, endTag, false, encoding_ ) )
	{
		value_.append( p, 1 );
		++p;
	}
	if ( p ) 
		p += strlen( endTag );

	return p;
}


const char* attribute::parse( const char* p, parsing_data* data, encoding encoding_ )
{
	p = skip_white_space( p, encoding_ );
	if ( !p || !*p ) return 0;

//	int tabsize = 4;
//	if ( document_ )
//		tabsize = document_->tab_size();

	if ( data )
	{
		data->stamp( p, encoding_ );
		location_ = data->get_cursor();
	}
	// read the name_, the '=' and the value_.
	const char* pErr = p;
	p = read_name( p, &name_, encoding_ );
	if ( !p || !*p )
	{
		if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding_ );
		return 0;
	}
	p = skip_white_space( p, encoding_ );
	if ( !p || !*p || *p != '=' )
	{
		if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding_ );
		return 0;
	}

	++p;	// skip '='
	p = skip_white_space( p, encoding_ );
	if ( !p || !*p )
	{
		if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding_ );
		return 0;
	}
	
	const char* end;
	const char SINGLE_QUOTE = '\'';
	const char DOUBLE_QUOTE = '\"';

	if ( *p == SINGLE_QUOTE )
	{
		++p;
		end = "\'";		// single quote in string
		p = read_text( p, &value_, false, end, false, encoding_ );
	}
	else if ( *p == DOUBLE_QUOTE )
	{
		++p;
		end = "\"";		// double quote in string
		p = read_text( p, &value_, false, end, false, encoding_ );
	}
	else
	{
		// All attribute_ values should be in single or double quotes.
		// But this is such a common error_ that the parser will try
		// its best, even without them.
		value_ = "";
		while (    p && *p											// existence
				&& !is_white_space( *p ) && *p != '\n' && *p != '\r'	// whitespace_
				&& *p != '/' && *p != '>' )							// tag end
		{
			if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
				// [ 1451649 ] get_attribute values with trailing quotes not handled correctly
				// We did not have an opening quote but seem to have a 
				// closing one. Give up and throw_ an error_.
				if ( document_ ) document_->set_error( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding_ );
				return 0;
			}
			value_ += *p;
			++p;
		}
	}
	return p;
}

#ifdef TIXML_USE_STL
void text::stream_in( std::istream * in, TIXML_STRING * tag )
{
	while ( in->good() )
	{
		int c = in->peek();	
		if ( !cdata && (c == '<' ) ) 
		{
			return;
		}
		if ( c <= 0 )
		{
			document* document_ = get_document();
			if ( document_ )
				document_->set_error( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
			return;
		}

		(*tag) += (char) c;
		in->get();	// "commits" the peek made above

		if ( cdata && c == '>' && tag->size() >= 3 ) {
			size_t len = tag->size();
			if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
				// terminator of cdata.
				return;
			}
		}    
	}
}
#endif

const char* text::parse( const char* p, parsing_data* data, encoding encoding_ )
{
	value_ = "";
	document* document_ = get_document();

	if ( data )
	{
		data->stamp( p, encoding_ );
		location_ = data->get_cursor();
	}

	const char* const startTag = "<![CDATA[";
	const char* const endTag   = "]]>";

	if ( cdata || string_equal( p, startTag, false, encoding_ ) )
	{
		cdata = true;

		if ( !string_equal( p, startTag, false, encoding_ ) )
		{
			document_->set_error( TIXML_ERROR_PARSING_CDATA, p, data, encoding_ );
			return 0;
		}
		p += strlen( startTag );

		// Keep all the white_ space, ignore the encoding_, etc.
		while (	   p && *p
				&& !string_equal( p, endTag, false, encoding_ )
			  )
		{
			value_ += *p;
			++p;
		}

		TIXML_STRING dummy; 
		p = read_text( p, &dummy, false, endTag, false, encoding_ );
		return p;
	}
	else
	{
		bool ignoreWhite = true;

		const char* end = "<";
		p = read_text( p, &value_, ignoreWhite, end, false, encoding_ );
		if ( p )
			return p-1;	// don't truncate the '<'
		return 0;
	}
}

#ifdef TIXML_USE_STL
void declaration::stream_in( std::istream * in, TIXML_STRING * tag )
{
	while ( in->good() )
	{
		int c = in->get();
		if ( c <= 0 )
		{
			document* document_ = get_document();
			if ( document_ )
				document_->set_error( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
			return;
		}
		(*tag) += (char) c;

		if ( c == '>' )
		{
			// All is well.
			return;
		}
	}
}
#endif

const char* declaration::parse( const char* p, parsing_data* data, encoding _encoding )
{
	p = skip_white_space( p, _encoding );
	// find the beginning, find the end, and look_ for
	// the stuff in-between.
	document* document_ = get_document();
	if ( !p || !*p || !string_equal( p, "<?xml", true, _encoding ) )
	{
		if ( document_ ) document_->set_error( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
		return 0;
	}
	if ( data )
	{
		data->stamp( p, _encoding );
		location_ = data->get_cursor();
	}
	p += 5;

	version_ = "";
	encoding_ = "";
	standalone_ = "";

	while ( p && *p )
	{
		if ( *p == '>' )
		{
			++p;
			return p;
		}

		p = skip_white_space( p, _encoding );
		if ( string_equal( p, "version", true, _encoding ) )
		{
			attribute attrib;
			p = attrib.parse( p, data, _encoding );		
			version_ = attrib.value();
		}
		else if ( string_equal( p, "encoding", true, _encoding ) )
		{
			attribute attrib;
			p = attrib.parse( p, data, _encoding );		
			encoding_ = attrib.value();
		}
		else if ( string_equal( p, "standalone", true, _encoding ) )
		{
			attribute attrib;
			p = attrib.parse( p, data, _encoding );		
			standalone_ = attrib.value();
		}
		else
		{
			// read over whatever it_ is.
			while( p && *p && *p != '>' && !is_white_space( *p ) )
				++p;
		}
	}
	return 0;
}

bool text::blank() const
{
	for ( unsigned i=0; i<value_.length(); i++ )
		if ( !is_white_space( value_[i] ) )
			return false;
	return true;
}

} }
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -