📄 parser.cpp
字号:
// Parser.cpp,v 1.29 2004/01/09 00:50:35 kitty Exp
#include "ACEXML/parser/parser/Parser.h"
#if !defined (__ACEXML_INLINE__)
# include "ACEXML/parser/parser/Parser.i"
#endif /* __ACEXML_INLINE__ */
#include "ace/ACE.h"
#include "ACEXML/common/Transcode.h"
#include "ACEXML/common/AttributesImpl.h"
#include "ACEXML/common/StrCharStream.h"
#include "ACEXML/common/StreamFactory.h"
#include "ACEXML/parser/parser/ParserInternals.h"
#include "ace/OS_NS_string.h"
#include "ace/OS_NS_strings.h"
static const ACEXML_Char default_attribute_type[] = ACE_TEXT ("CDATA");
static const ACEXML_Char empty_string[] = { 0 };
const ACEXML_Char
ACEXML_Parser::simple_parsing_feature_[] = ACE_TEXT ("Simple");
const ACEXML_Char
ACEXML_Parser::namespaces_feature_[] = ACE_TEXT ("http://xml.org/sax/features/namespaces");
const ACEXML_Char
ACEXML_Parser::namespace_prefixes_feature_[] = ACE_TEXT ("http://xml.org/sax/features/namespace-prefixes");
const ACEXML_Char
ACEXML_Parser::validation_feature_[] = ACE_TEXT ("http://xml.org/sax/features/validation");
ACEXML_Parser::ACEXML_Parser (void)
: dtd_handler_ (0),
entity_resolver_ (0),
content_handler_ (0),
error_handler_ (0),
doctype_ (0),
current_ (0),
alt_stack_ (MAXPATHLEN),
nested_namespace_ (0),
ref_state_ (ACEXML_ParserInt::INVALID),
external_subset_ (0),
external_entity_ (0),
has_pe_refs_ (0),
standalone_ (0),
external_dtd_ (0),
internal_dtd_ (0),
simple_parsing_ (0),
validate_ (1),
namespaces_(1),
namespace_prefixes_ (0)
{
}
ACEXML_Parser::~ACEXML_Parser (void)
{
}
int
ACEXML_Parser::initialize(ACEXML_InputSource* input)
{
// Initialize namespace support
if (this->xml_namespace_.init() == -1)
{
ACE_ERROR ((LM_ERROR,
ACE_TEXT ("Error initializing namespace support\n")));
return -1;
}
for (int i = 0; i < 5; ++i)
{
if (this->predef_entities_.add_entity (ACEXML_ParserInt::predef_ent_[i],
ACEXML_ParserInt::predef_val_[i])
!= 0)
{
ACE_ERROR ((LM_DEBUG,
ACE_TEXT ("Error adding entity %s to Manager\n"),
ACEXML_ParserInt::predef_ent_[i]));
return -1;
}
}
return this->switch_input (input, input->getSystemId());
}
void
ACEXML_Parser::parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
ACEXML_InputSource* input = 0;
ACE_NEW (input, ACEXML_InputSource (systemId));
this->parse (input ACEXML_ENV_ARG_PARAMETER);
}
void
ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
if (input == 0)
{
this->fatal_error(ACE_TEXT ("Invalid input source")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
if (this->content_handler_ == 0)
{
this->fatal_error (ACE_TEXT ("No content handlers defined. Exiting..")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
if (this->validate_ && this->dtd_handler_ == 0)
{
this->fatal_error (ACE_TEXT ("No DTD handlers defined. Exiting..")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
if (this->initialize(input) == -1)
{
this->fatal_error (ACE_TEXT ("Failed to initialize parser state")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
// Set up Locator.
this->content_handler_->setDocumentLocator (this->current_->getLocator());
int xmldecl_defined = 0;
ACEXML_Char fwd = this->get(); // Consume '<'
if (fwd == '<' && this->peek() == '?')
{
this->get(); // Consume '?'
fwd = this->peek();
if (fwd == 'x' && !xmldecl_defined)
{
this->parse_xml_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
xmldecl_defined = 1;
}
}
// We need a XMLDecl in a Valid XML document
if (this->validate_ && !xmldecl_defined)
{
this->fatal_error (ACE_TEXT ("Expecting an XMLDecl at the beginning of")
ACE_TEXT (" a valid document")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
this->content_handler_->startDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
int doctype_defined = 0;
for (int prolog_done = 0; prolog_done == 0; )
{
// Expect a '<' only if we have encountered a XMLDecl, or we are
// looping through Misc blocks.
if (xmldecl_defined)
{
if (this->skip_whitespace () != '<')
{
this->fatal_error (ACE_TEXT ("Expecting '<' at the beginning of ")
ACE_TEXT ("Misc section")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
fwd = this->peek();
}
switch (fwd)
{
case '?':
this->get();
this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
xmldecl_defined = 1;
break;
case '!':
this->get();
fwd = this->peek ();
if (fwd == 'D' && !doctype_defined) // DOCTYPE
{
// This will also take care of the trailing MISC block if any.
this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
doctype_defined = 1;
// Now that we have a DOCTYPE Decl defined, we shouldn't
// accept XML Decl any longer
xmldecl_defined = 1;
}
else if (fwd == 'D')
{
this->fatal_error (ACE_TEXT ("Duplicate DOCTYPE declaration")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
else if (fwd == '-') // COMMENT
{
if (this->parse_comment () < 0)
{
this->fatal_error(ACE_TEXT ("Invalid comment in document")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
xmldecl_defined = 1;
}
break;
case 0:
this->fatal_error (ACE_TEXT ("Unexpected end-of-file")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
default: // Root element begins
prolog_done = 1;
break;
}
}
if (this->validate_ && !doctype_defined)
{
this->warning (ACE_TEXT ("No doctypeDecl in valid document")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
}
// Now parse root element.
this->parse_element (1 ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
this->content_handler_->endDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
// Reset the parser state
this->reset();
}
int
ACEXML_Parser::parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
if (this->parse_token (ACE_TEXT ("DOCTYPE")) < 0)
{
this->fatal_error(ACE_TEXT ("Expecting keyword DOCTYPE in a doctypedecl")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
}
ACEXML_Char nextch = 0;
if (this->skip_whitespace_count (&nextch) == 0)
{
this->fatal_error(ACE_TEXT ("Expecting a space between DOCTYPE keyword ")
ACE_TEXT ("and name") ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
}
this->doctype_ = this->parse_name ();
if (this->doctype_ == 0)
{
this->fatal_error(ACE_TEXT ("Invalid DOCTYPE name")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
}
int count = this->skip_whitespace_count (&nextch);
if (nextch == 'S' || nextch == 'P') // ExternalID defined
{
if (count == 0)
{
this->fatal_error(ACE_TEXT ("Expecting a space between DOCTYPE")
ACE_TEXT ("keyword and name")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
}
this->external_dtd_ = 1;
this->parse_external_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
}
nextch = this->skip_whitespace ();
switch (nextch)
{
case '[':
this->internal_dtd_ = 1; // Internal DTD definition
this->parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
case '>': // End of DTD definition
// This is an XML document without a doctypedecl.
if (this->validate_ && !this->external_dtd_)
{
this->fatal_error (ACE_TEXT ("No DTD defined")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
}
return 0;
case '0':
this->fatal_error (ACE_TEXT ("Unexpected end-of-file")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
default:
break;
}
if (this->skip_whitespace() != '>')
{
this->fatal_error(ACE_TEXT ("Expecting '>' at end of doctypedecl")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
}
return 0;
}
int
ACEXML_Parser::parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
this->ref_state_ = ACEXML_ParserInt::IN_INT_DTD;
ACEXML_Char nextch = this->skip_whitespace ();
do {
switch (nextch)
{
case '<':
nextch = this->get();
switch (nextch)
{
case '!':
this->parse_markup_decl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
case '?':
this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
default:
this->fatal_error (ACE_TEXT ("Invalid internal subset")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
}
break;
case '%':
this->has_pe_refs_ = 1;
this->parse_PE_reference (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
case ']': // End of internal definitions.
return 0;
case '&':
this->fatal_error (ACE_TEXT ("Invalid Reference in internal DTD")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
case 0:
this->pop_context (0 ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
break;
default:
this->fatal_error (ACE_TEXT ("Invalid content in internal subset")
ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK_RETURN (-1);
};
nextch = this->skip_whitespace ();
} while (1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -