📄 parser.cpp
字号:
// Parser.cpp,v 1.21 2002/10/15 22:21:36 kitty Exp
#include "ACEXML/parser/parser/Parser.h"
#include "ACEXML/common/Transcode.h"
#include "ACEXML/common/AttributesImpl.h"
#include "ace/ACE.h"
static const ACEXML_Char default_attribute_type[] = {'C', 'D', 'A', 'T', 'A', 0};
static const ACEXML_Char empty_string[] = { 0 };
const ACEXML_Char
ACEXML_Parser::simple_parsing_feature_[] = { 'S', 'i', 'm', 'p', 'l', 'e', 0 };
const ACEXML_Char
ACEXML_Parser::namespaces_feature_[] = {'h', 't', 't', 'p', ':', '/', '/', 'x', 'm', 'l', '.', 'o', 'r', 'g', '/', 's', 'a', 'x', '/', 'f', 'e', 'a', 't', 'u', 'r', 'e', 's', '/', 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', 's', 0 };
const ACEXML_Char
ACEXML_Parser::namespace_prefixes_feature_[] = {'h', 't', 't', 'p', ':', '/', '/', 'x', 'm', 'l', '.', 'o', 'r', 'g', '/', 's', 'a', 'x', '/', 'f', 'e', 'a', 't', 'u', 'r', 'e', 's', '/', 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '-', 'p', 'r', 'e', 'f', 'i', 'x', 'e', 's', 0 };
#if !defined (__ACEXML_INLINE__)
# include "ACEXML/parser/parser/Parser.i"
#endif /* __ACEXML_INLINE__ */
ACEXML_Parser::ACEXML_Parser (void)
: dtd_handler_ (0),
entity_resolver_ (0),
content_handler_ (0),
error_handler_ (0),
instream_ (0),
doctype_ (0),
dtd_system_ (0),
dtd_public_ (0),
locator_(),
simple_parsing_ (0),
namespaces_(1),
namespace_prefixes_ (0)
{
}
ACEXML_Parser::~ACEXML_Parser (void)
{
}
int
ACEXML_Parser::getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
ACEXML_SAXNotSupportedException))
{
if (ACE_OS::strcmp (name,
ACEXML_Parser::simple_parsing_feature_) == 0)
{
return this->simple_parsing_;
}
else if (ACE_OS::strcmp (name,
ACEXML_Parser::namespaces_feature_) == 0)
{
return this->namespaces_;
}
else if (ACE_OS::strcmp (name,
ACEXML_Parser::namespace_prefixes_feature_) == 0)
{
return this->namespace_prefixes_;
}
ACEXML_THROW_RETURN (ACEXML_SAXNotRecognizedException (name), -1);
}
void
ACEXML_Parser::setFeature (const ACEXML_Char *name,
int boolean_value ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
ACEXML_SAXNotSupportedException))
{
if (ACE_OS::strcmp (name,
ACEXML_Parser::simple_parsing_feature_) == 0)
{
this->simple_parsing_ = (boolean_value == 0 ? 0 : 1);
return;
}
else if (ACE_OS::strcmp (name,
ACEXML_Parser::namespaces_feature_) == 0)
{
this->namespaces_ = (boolean_value == 0 ? 0 : 1);
return;
}
else if (ACE_OS::strcmp (name,
ACEXML_Parser::namespace_prefixes_feature_) == 0)
{
this->namespace_prefixes_ = (boolean_value == 0 ? 0 : 1);
return;
}
ACEXML_THROW (ACEXML_SAXNotRecognizedException (name));
}
void *
ACEXML_Parser::getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
ACEXML_SAXNotSupportedException))
{
ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (name), 0);
}
void
ACEXML_Parser::setProperty (const ACEXML_Char *name,
void *value ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
ACEXML_SAXNotSupportedException))
{
ACE_UNUSED_ARG (value);
ACEXML_THROW (ACEXML_SAXNotSupportedException (name));
}
void
ACEXML_Parser::report_error (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
{
ACEXML_SAXParseException* exception = 0;
ACE_NEW_NORETURN (exception,
ACEXML_SAXParseException (message));
if (this->error_handler_)
this->error_handler_->error (*exception ACEXML_ENV_ARG_PARAMETER);
else
ACEXML_ENV_RAISE (exception);
return;
}
void
ACEXML_Parser::report_warning (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
{
ACEXML_SAXParseException* exception = 0;
ACE_NEW_NORETURN (exception,
ACEXML_SAXParseException (message));
if (this->error_handler_)
this->error_handler_->warning (*exception ACEXML_ENV_ARG_PARAMETER);
return;
}
void
ACEXML_Parser::report_fatal_error (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
{
ACEXML_SAXParseException* exception = 0;
ACE_NEW_NORETURN (exception,
ACEXML_SAXParseException (message));
if (this->error_handler_)
this->error_handler_->fatalError (*exception ACEXML_ENV_ARG_PARAMETER);
ACEXML_ENV_RAISE (exception);
return;
}
void
ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
if (input == 0 || (this->instream_ = input->getCharStream ()) == 0)
{
this->report_fatal_error(ACE_TEXT ("Invalid input source") ACEXML_ENV_ARG_PARAMETER);
return;
}
// Set up Locator. At this point, the systemId and publicId are null. We
// can't do better, as we don't know anything about the InputSource
// currently, and according to the SAX spec, the parser should set up the
// locator before reporting any document events.
if (this->content_handler_)
this->content_handler_->setDocumentLocator (&this->locator_);
if (this->simple_parsing_ == 0)
{
this->parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
}
this->content_handler_->startDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
int doctype_defined = 0;
for (int prolog_done = 0; prolog_done == 0; )
{
if (this->skip_whitespace (0) != '<')
{
this->report_fatal_error (ACE_TEXT ("Expecting '<'") ACEXML_ENV_ARG_PARAMETER);
return;
}
ACEXML_Char fwd = this->peek ();
switch (fwd)
{
case '!':
this->get (); // consume the '!'
fwd = this->peek ();
if (fwd == 'D' && !doctype_defined) // DOCTYPE
{
// This will also take care of the trailing MISC block if any.
this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
doctype_defined = 1;
break;
}
else if (fwd == '-') // COMMENT
{
if (this->grok_comment () < 0)
{
this->report_fatal_error(ACE_TEXT ("Invalid comment") ACEXML_ENV_ARG_PARAMETER);
return;
}
}
else
{
this->report_fatal_error (ACE_TEXT ("Duplicate DOCTYPE definitions") ACEXML_ENV_ARG_PARAMETER);
return;
}
break;
case '?':
this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
break;
case 0:
this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
return;
default: // Root element begins
prolog_done = 1;
break;
}
}
// Now parse root element.
this->parse_element (1 ACEXML_ENV_ARG_PARAMETER);
ACEXML_CHECK;
this->content_handler_->endDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
ACEXML_CHECK;
// Reset the Locator held within the parser
this->locator_.reset();
}
void
ACEXML_Parser::parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
// @@ Not implemented.
ACE_UNUSED_ARG (systemId);
ACEXML_THROW (ACEXML_SAXNotSupportedException ());
}
void
ACEXML_Parser::parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
ACE_THROW_SPEC ((ACEXML_SAXException))
{
if (this->parse_token (ACE_TEXT("<?xml")) < 0)
{
this->report_fatal_error(ACE_TEXT ("Invalid XMLDecl ('<?xml' ?)") ACEXML_ENV_ARG_PARAMETER);
return;
}
ACEXML_Char *astring;
if (this->skip_whitespace (0) != 'v' // Discard whitespace
|| (this->parse_token (ACE_TEXT("ersion")) < 0)
|| this->skip_equal () != 0
|| this->get_quoted_string (astring) != 0)
{
this->report_fatal_error (ACE_TEXT ("Unrecognized XMLDecl ('version'?)") ACEXML_ENV_ARG_PARAMETER);
return;
}
// @@ Handle version number here.
int xmldecl_state = 0;
int seen_encoding = 0;
while (1)
{
ACEXML_Char fwd = this->peek ();
if (fwd != '?')
{
fwd = this->skip_whitespace (0); // Discard whitespace
if (fwd == '?')
{
// Fall down to consume the '?' and wrap up the XML Decl parsing.
}
else if (xmldecl_state == 0 && fwd == 'e')
{
if ((this->parse_token (ACE_TEXT("ncoding")) == 0) &&
this->skip_equal () == 0 &&
this->get_quoted_string (astring) == 0)
{
if (seen_encoding)
{
this->report_fatal_error (ACE_TEXT ("Duplicate encoding defined") ACEXML_ENV_ARG_PARAMETER);
return;
}
else
{
seen_encoding = 1;
if (ACE_OS::strcmp (astring,
this->instream_->getEncoding()) != 0)
{
if (ACE_OS::strstr (astring,
this->instream_->getEncoding()) != 0)
{
ACE_ERROR ((LM_ERROR,
ACE_TEXT ("Detected Encoding is %s : Declared Encoding is %s"),
this->instream_->getEncoding(), astring));
this->report_fatal_error (ACE_TEXT ("Encoding declaration doesn't match detected encoding") ACEXML_ENV_ARG_PARAMETER);
return;
}
}
}
continue;
}
else
break;
}
else if (xmldecl_state < 2 && fwd == 's')
{
if ((this->parse_token (ACE_TEXT("tandalone")) == 0) &&
this->skip_equal () == 0 &&
this->get_quoted_string (astring) == 0)
{
xmldecl_state = 2;
if (ACE_OS::strcmp (astring, ACE_TEXT ("yes")) == 0)
{
// @@ This is a standalone XML file.
continue;
}
else if (ACE_OS::strcmp (astring, ACE_TEXT ("no")) == 0)
{
// @@ This is not a stand alone XML file.
continue;
}
}
break;
}
else
break;
}
if (this->parse_token (ACE_TEXT ("?>")) < 0)
break;
return;
} // End parsing XML Decl.
this->report_fatal_error (ACE_TEXT ("Unrecognized XML Decl ('standalone'?)") ACEXML_ENV_ARG_PARAMETER);
return;
}
int
ACEXML_Parser::grok_comment (void)
{
/// Simply filter out all the comment
int state = 0;
if (this->get () != '-' || // Skip the opening "<!--"
this->get () != '-' || // completely.
this->get () == '-') // and at least something not '-'.
return -1;
while (state < 3)
// Waiting for the trailing three character '-->'. Notice that
// according to the spec, '--->' is not a valid closing comment
// sequence. But we'll let it pass anyway.
{
ACEXML_Char fwd = this->get ();
if ((fwd == '-' && state < 2) ||
(fwd == '>' && state == 2))
state += 1;
else
state = 0; // Reset parse state.
}
return 0;
}
int
ACEXML_Parser::parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL)
{
if (this->get () != '?')
{ // How did we get here?
this->report_fatal_error(ACE_TEXT ("Internal error") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
const ACEXML_Char *pitarget = this->read_name ();
ACEXML_Char *instruction = 0;
if (ACE_OS::strcasecmp (ACE_TEXT ("xml"), pitarget) != 0)
{
// Invalid PITarget name.
this->report_fatal_error(ACE_TEXT ("PITarget name cannot start with 'xml'") ACEXML_ENV_ARG_PARAMETER);
return -1;
}
int state = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -