⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parser.cpp

📁 一个开源的网络开发库ACE
💻 CPP
📖 第 1 页 / 共 5 页
字号:
// Parser.cpp,v 1.21 2002/10/15 22:21:36 kitty Exp

#include "ACEXML/parser/parser/Parser.h"
#include "ACEXML/common/Transcode.h"
#include "ACEXML/common/AttributesImpl.h"
#include "ace/ACE.h"

static const ACEXML_Char default_attribute_type[] = {'C', 'D', 'A', 'T', 'A', 0};
static const ACEXML_Char empty_string[] = { 0 };

const ACEXML_Char
ACEXML_Parser::simple_parsing_feature_[] = { 'S', 'i', 'm', 'p', 'l', 'e', 0 };

const ACEXML_Char
ACEXML_Parser::namespaces_feature_[] = {'h', 't', 't', 'p', ':', '/', '/', 'x', 'm', 'l', '.', 'o', 'r', 'g', '/', 's', 'a', 'x', '/', 'f', 'e', 'a', 't', 'u', 'r', 'e', 's', '/', 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', 's', 0 };

const ACEXML_Char
ACEXML_Parser::namespace_prefixes_feature_[] = {'h', 't', 't', 'p', ':', '/', '/', 'x', 'm', 'l', '.', 'o', 'r', 'g', '/', 's', 'a', 'x', '/', 'f', 'e', 'a', 't', 'u', 'r', 'e', 's', '/', 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '-', 'p', 'r', 'e', 'f', 'i', 'x', 'e', 's', 0 };

#if !defined (__ACEXML_INLINE__)
# include "ACEXML/parser/parser/Parser.i"
#endif /* __ACEXML_INLINE__ */

ACEXML_Parser::ACEXML_Parser (void)
  :   dtd_handler_ (0),
      entity_resolver_ (0),
      content_handler_ (0),
      error_handler_ (0),
      instream_ (0),
      doctype_ (0),
      dtd_system_ (0),
      dtd_public_ (0),
      locator_(),
      simple_parsing_ (0),
      namespaces_(1),
      namespace_prefixes_ (0)
{
}

ACEXML_Parser::~ACEXML_Parser (void)
{
}

int
ACEXML_Parser::getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
   ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
                    ACEXML_SAXNotSupportedException))
{
  if (ACE_OS::strcmp (name,
                             ACEXML_Parser::simple_parsing_feature_) == 0)
    {
      return this->simple_parsing_;
    }
  else if (ACE_OS::strcmp (name,
                                  ACEXML_Parser::namespaces_feature_) == 0)
    {
      return this->namespaces_;
    }
  else if (ACE_OS::strcmp (name,
                                  ACEXML_Parser::namespace_prefixes_feature_) == 0)
    {
      return this->namespace_prefixes_;
    }

  ACEXML_THROW_RETURN (ACEXML_SAXNotRecognizedException (name), -1);
}



void
ACEXML_Parser::setFeature (const ACEXML_Char *name,
                           int boolean_value ACEXML_ENV_ARG_DECL)
        ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
                         ACEXML_SAXNotSupportedException))
{
  if (ACE_OS::strcmp (name,
                             ACEXML_Parser::simple_parsing_feature_) == 0)
    {
      this->simple_parsing_ = (boolean_value == 0 ? 0 : 1);
      return;
    }
  else if (ACE_OS::strcmp (name,
                                  ACEXML_Parser::namespaces_feature_) == 0)
    {
      this->namespaces_ = (boolean_value == 0 ? 0 : 1);
      return;
    }
  else if (ACE_OS::strcmp (name,
                                  ACEXML_Parser::namespace_prefixes_feature_) == 0)
    {
      this->namespace_prefixes_ = (boolean_value == 0 ? 0 : 1);
      return;
    }

  ACEXML_THROW (ACEXML_SAXNotRecognizedException (name));
}

void *
ACEXML_Parser::getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
  ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
                   ACEXML_SAXNotSupportedException))
{
  ACEXML_THROW_RETURN (ACEXML_SAXNotSupportedException (name), 0);
}

void
ACEXML_Parser::setProperty (const ACEXML_Char *name,
                            void *value ACEXML_ENV_ARG_DECL)
  ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
                   ACEXML_SAXNotSupportedException))
{
  ACE_UNUSED_ARG (value);

  ACEXML_THROW (ACEXML_SAXNotSupportedException (name));
}

void
ACEXML_Parser::report_error (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
{
  ACEXML_SAXParseException* exception = 0;
  ACE_NEW_NORETURN (exception,
                    ACEXML_SAXParseException (message));
  if (this->error_handler_)
    this->error_handler_->error (*exception ACEXML_ENV_ARG_PARAMETER);
  else
    ACEXML_ENV_RAISE (exception);
  return;
}

void
ACEXML_Parser::report_warning (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
{
  ACEXML_SAXParseException* exception = 0;
  ACE_NEW_NORETURN (exception,
                    ACEXML_SAXParseException (message));
  if (this->error_handler_)
    this->error_handler_->warning (*exception ACEXML_ENV_ARG_PARAMETER);
  return;
}

void
ACEXML_Parser::report_fatal_error (const ACEXML_Char* message ACEXML_ENV_ARG_DECL)
{
  ACEXML_SAXParseException* exception = 0;
  ACE_NEW_NORETURN (exception,
                    ACEXML_SAXParseException (message));
  if (this->error_handler_)
    this->error_handler_->fatalError (*exception ACEXML_ENV_ARG_PARAMETER);
  ACEXML_ENV_RAISE (exception);
  return;
}

void
ACEXML_Parser::parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL)
  ACE_THROW_SPEC ((ACEXML_SAXException))
{
  if (input == 0 || (this->instream_ = input->getCharStream ())  == 0)
    {
      this->report_fatal_error(ACE_TEXT ("Invalid input source") ACEXML_ENV_ARG_PARAMETER);
      return;
    }

  // Set up Locator. At this point, the systemId and publicId are null. We
  // can't do better, as we don't know anything about the InputSource
  // currently, and according to the SAX spec, the parser should set up the
  // locator before reporting any document events.
  if (this->content_handler_)
    this->content_handler_->setDocumentLocator (&this->locator_);

  if (this->simple_parsing_ == 0)
    {
      this->parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_PARAMETER);
      ACEXML_CHECK;
    }

  this->content_handler_->startDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
  ACEXML_CHECK;

  int doctype_defined = 0;

  for (int prolog_done = 0; prolog_done == 0; )
    {
      if (this->skip_whitespace (0) != '<')
        {
          this->report_fatal_error (ACE_TEXT ("Expecting '<'") ACEXML_ENV_ARG_PARAMETER);
          return;
        }
      ACEXML_Char fwd = this->peek ();
      switch (fwd)
        {
        case '!':
          this->get ();         // consume the '!'
          fwd = this->peek ();
          if (fwd == 'D' && !doctype_defined)       // DOCTYPE
            {
              // This will also take care of the trailing MISC block if any.
              this->parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_PARAMETER);
              ACEXML_CHECK;
              doctype_defined = 1;
              break;
            }
          else if (fwd == '-')  // COMMENT
            {
              if (this->grok_comment () < 0)
                {
                  this->report_fatal_error(ACE_TEXT ("Invalid comment") ACEXML_ENV_ARG_PARAMETER);
                  return;
                }
            }
          else
            {
              this->report_fatal_error (ACE_TEXT ("Duplicate DOCTYPE definitions") ACEXML_ENV_ARG_PARAMETER);
              return;
            }
          break;
        case '?':
          this->parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_PARAMETER);
          ACEXML_CHECK;
          break;
        case 0:
          this->report_fatal_error (ACE_TEXT ("Unexpected EOF") ACEXML_ENV_ARG_PARAMETER);
          return;
        default:                // Root element begins
          prolog_done = 1;
          break;
        }
    }

  // Now parse root element.
  this->parse_element (1 ACEXML_ENV_ARG_PARAMETER);
  ACEXML_CHECK;

  this->content_handler_->endDocument (ACEXML_ENV_SINGLE_ARG_PARAMETER);
  ACEXML_CHECK;

  // Reset the Locator held within the parser
  this->locator_.reset();
}

void
ACEXML_Parser::parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL)
  ACE_THROW_SPEC ((ACEXML_SAXException))
{
  // @@ Not implemented.
  ACE_UNUSED_ARG (systemId);

  ACEXML_THROW (ACEXML_SAXNotSupportedException ());
}


void
ACEXML_Parser::parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
      ACE_THROW_SPEC ((ACEXML_SAXException))
{
  if (this->parse_token (ACE_TEXT("<?xml")) < 0)
    {
      this->report_fatal_error(ACE_TEXT ("Invalid XMLDecl ('<?xml' ?)") ACEXML_ENV_ARG_PARAMETER);
      return;
    }

  ACEXML_Char *astring;

  if (this->skip_whitespace (0) != 'v' // Discard whitespace
      || (this->parse_token (ACE_TEXT("ersion")) < 0)
      || this->skip_equal () != 0
      || this->get_quoted_string (astring) != 0)
    {
      this->report_fatal_error (ACE_TEXT ("Unrecognized XMLDecl ('version'?)") ACEXML_ENV_ARG_PARAMETER);
      return;
    }
  // @@ Handle version number here.
  int xmldecl_state = 0;
  int seen_encoding = 0;

  while (1)
    {
      ACEXML_Char fwd = this->peek ();
      if (fwd != '?')
        {
          fwd = this->skip_whitespace (0); // Discard whitespace
          if (fwd == '?')
            {
              // Fall down to consume the '?' and wrap up the XML Decl parsing.
            }
          else if (xmldecl_state == 0 && fwd == 'e')
            {
              if ((this->parse_token (ACE_TEXT("ncoding")) == 0) &&
                  this->skip_equal () == 0 &&
                  this->get_quoted_string (astring) == 0)
                {
                  if (seen_encoding)
                    {
                      this->report_fatal_error (ACE_TEXT ("Duplicate encoding defined") ACEXML_ENV_ARG_PARAMETER);
                      return;
                    }
                  else
                    {
                    seen_encoding = 1;
                      if (ACE_OS::strcmp (astring,
                                          this->instream_->getEncoding()) != 0)
                        {
                          if (ACE_OS::strstr (astring,
                                              this->instream_->getEncoding()) != 0)
                            {
                              ACE_ERROR ((LM_ERROR,
                                          ACE_TEXT ("Detected Encoding is %s : Declared Encoding is %s"),
                                          this->instream_->getEncoding(), astring));
                              this->report_fatal_error (ACE_TEXT ("Encoding declaration doesn't match detected encoding") ACEXML_ENV_ARG_PARAMETER);
                              return;
                            }
                        }
                    }
                  continue;
                }
              else
                break;
            }
          else if (xmldecl_state < 2 && fwd == 's')
            {
              if ((this->parse_token (ACE_TEXT("tandalone")) == 0) &&
                  this->skip_equal () == 0 &&
                  this->get_quoted_string (astring) == 0)
                {
                  xmldecl_state = 2;
                  if (ACE_OS::strcmp (astring, ACE_TEXT ("yes")) == 0)
                    {
                      // @@ This is a standalone XML file.
                      continue;
                    }
                  else if (ACE_OS::strcmp (astring, ACE_TEXT ("no")) == 0)
                    {
                      // @@ This is not a stand alone XML file.
                      continue;
                    }
                }
              break;
            }
          else
            break;
        }
      if (this->parse_token (ACE_TEXT ("?>")) < 0)
        break;
      return;
    }   // End parsing XML Decl.
  this->report_fatal_error (ACE_TEXT ("Unrecognized XML Decl ('standalone'?)") ACEXML_ENV_ARG_PARAMETER);
  return;
}

int
ACEXML_Parser::grok_comment (void)
{
  /// Simply filter out all the comment
  int state = 0;

  if (this->get () != '-' ||    // Skip the opening "<!--"
      this->get () != '-' ||    // completely.
      this->get () == '-')      // and at least something not '-'.
    return -1;

  while (state < 3)
    // Waiting for the trailing three character '-->'. Notice that
    // according to the spec, '--->' is not a valid closing comment
    // sequence. But we'll let it pass anyway.
    {
      ACEXML_Char fwd = this->get ();
      if ((fwd == '-' && state < 2) ||
          (fwd == '>' && state == 2))
        state += 1;
      else
        state = 0;              // Reset parse state.
    }
  return 0;
}

int
ACEXML_Parser::parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL)
{
  if (this->get () != '?')
    {                           // How did we get here?
      this->report_fatal_error(ACE_TEXT ("Internal error") ACEXML_ENV_ARG_PARAMETER);
      return -1;
    }
  const ACEXML_Char *pitarget = this->read_name ();
  ACEXML_Char *instruction = 0;

  if (ACE_OS::strcasecmp (ACE_TEXT ("xml"), pitarget) != 0)
    {
      // Invalid PITarget name.
      this->report_fatal_error(ACE_TEXT ("PITarget name cannot start with 'xml'") ACEXML_ENV_ARG_PARAMETER);
      return -1;
    }

  int state = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -