📄 xmlparser.cpp

📁 Pegasus is an open-source implementationof the DMTF CIM and WBEM standards. It is designed to be por
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
//%2006//////////////////////////////////////////////////////////////////////////// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.// Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;// IBM Corp.; EMC Corporation, The Open Group.// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;// EMC Corporation; VERITAS Software Corporation; The Open Group.// Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;// EMC Corporation; Symantec Corporation; The Open Group.//// Permission is hereby granted, free of charge, to any person obtaining a copy// of this software and associated documentation files (the "Software"), to// deal in the Software without restriction, including without limitation the// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or// sell copies of the Software, and to permit persons to whom the Software is// furnished to do so, subject to the following conditions:// // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.////==============================================================================////%///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// XmlParser////      This file contains a simple non-validating XML parser. Here are//      serveral rules for well-formed XML:////          1.  Documents must begin with an XML declaration:////              <?xml version="1.0" standalone="yes"?>////          2.  Comments have the form:////              <!-- blah blah blah -->////          3. The following entity references are supported:////              &amp - ampersand//              &lt - less-than//              &gt - greater-than//              &quot - full quote//              &apos - apostrophe////             as well as character (numeric) references:////              &#49; - decimal reference for character '1'//              &#x31; - hexadecimal reference for character '1'////          4. Element names and attribute names take the following form:////              [A-Za-z_][A-Za-z_0-9-.:]////          5.  Arbitrary data (CDATA) can be enclosed like this:////                  <![CDATA[//                  ...//                  ]]>////          6.  Element names and attributes names are case-sensitive.////          7.  XmlAttribute values must be delimited by full or half quotes.//              XmlAttribute values must be delimited.////          8.  <!DOCTYPE...>//// TODO:////      ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is//      work. Handle <!DOCTYPE...> sections which are complicated (containing//        rules rather than references to files).////      Remove newlines from string literals:////          Example: <xyz x="hello//              world">//////////////////////////////////////////////////////////////////////////////////#include <Pegasus/Common/Config.h>#include <cctype>#include <cstdio>#include <cstdlib>#include <cstring>#include "XmlParser.h"#include "Logger.h"#include "ExceptionRep.h"#include "CharSet.h"PEGASUS_NAMESPACE_BEGIN//////////////////////////////////////////////////////////////////////////////////// Static helper functions//////////////////////////////////////////////////////////////////////////////////static void _printValue(const char* p){    for (; *p; p++)    {        if (*p == '\n')            PEGASUS_STD(cout) << "\\n";        else if (*p == '\r')            PEGASUS_STD(cout) << "\\r";        else if (*p == '\t')            PEGASUS_STD(cout) << "\\t";        else            PEGASUS_STD(cout) << *p;    }}struct EntityReference{    const char* match;    Uint32 length;    char replacement;};// ATTN: Add support for more entity referencesstatic EntityReference _references[] ={    { "&amp;", 5, '&' },    { "&lt;", 4, '<' },    { "&gt;", 4, '>' },    { "&quot;", 6, '"' },    { "&apos;", 6, '\'' }};// Implements a check for a whitespace character, without calling// isspace( ).  The isspace( ) function is locale-sensitive,// and incorrectly flags some chars above 0x7f as whitespace.  This// causes the XmlParser to incorrectly parse UTF-8 data.//// Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)// defines white space as:// S    ::=    (#x20 | #x9 | #xD | #xA)+static inline int _isspace(char c){    return CharSet::isXmlWhiteSpace((Uint8)c);}static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));//////////////////////////////////////////////////////////////////////////////////// XmlException//////////////////////////////////////////////////////////////////////////////////static const char* _xmlMessages[] ={    "Bad opening element",    "Bad closing element",    "Bad attribute name",    "Exepected equal sign",    "Bad attribute value",    "A \"--\" sequence found within comment",    "Unterminated comment",    "Unterminated CDATA block",    "Unterminated DOCTYPE",    "Too many attributes: parser only handles 10",    "Malformed reference",    "Expected a comment or CDATA following \"<!\" sequence",    "Closing element does not match opening element",    "One or more tags are still open",    "More than one root element was encountered",    "Validation error",    "Semantic error"};static const char* _xmlKeys[] ={    "Common.XmlParser.BAD_START_TAG",    "Common.XmlParser.BAD_END_TAG",    "Common.XmlParser.BAD_ATTRIBUTE_NAME",    "Common.XmlParser.EXPECTED_EQUAL_SIGN",    "Common.XmlParser.BAD_ATTRIBUTE_VALUE",    "Common.XmlParser.MINUS_MINUS_IN_COMMENT",    "Common.XmlParser.UNTERMINATED_COMMENT",    "Common.XmlParser.UNTERMINATED_CDATA",    "Common.XmlParser.UNTERMINATED_DOCTYPE",    "Common.XmlParser.TOO_MANY_ATTRIBUTES",    "Common.XmlParser.MALFORMED_REFERENCE",    "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA",    "Common.XmlParser.START_END_MISMATCH",    "Common.XmlParser.UNCLOSED_TAGS",    "Common.XmlParser.MULTIPLE_ROOTS",    "Common.XmlParser.VALIDATION_ERROR",    "Common.XmlParser.SEMANTIC_ERROR"};// l10n replace _formMessage (comment out the old one)/*static String _formMessage(Uint32 code, Uint32 line, const String& message){    String result = _xmlMessages[Uint32(code) - 1];    char buffer[32];    sprintf(buffer, "%d", line);    result.append(": on line ");    result.append(buffer);    if (message.size())    {        result.append(": ");        result.append(message);    }    return result;}*/static MessageLoaderParms _formMessage(    Uint32 code,    Uint32 line,    const String& message){    String dftMsg = _xmlMessages[Uint32(code) - 1];    String key = _xmlKeys[Uint32(code) - 1];        String msg = message;    dftMsg.append(": on line $0");    if (message.size())    {        msg = ": " + msg;        dftMsg.append("$1");    }    return MessageLoaderParms(key, dftMsg, line ,msg);}static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line){    String dftMsg = _xmlMessages[Uint32(code) - 1];    String key = _xmlKeys[Uint32(code) - 1];    dftMsg.append(": on line $0");    return MessageLoaderParms(key, dftMsg, line);}XmlException::XmlException(    XmlException::Code code,    Uint32 lineNumber,    const String& message)    : Exception(_formMessage(code, lineNumber, message)){}XmlException::XmlException(    XmlException::Code code,    Uint32 lineNumber,    MessageLoaderParms& msgParms)    : Exception(_formPartialMessage(code, lineNumber)){        if (msgParms.default_msg.size())    {        msgParms.default_msg = ": " + msgParms.default_msg;    }        _rep->message.append(MessageLoader::getMessage(msgParms));}//////////////////////////////////////////////////////////////////////////////////// XmlValidationError//////////////////////////////////////////////////////////////////////////////////XmlValidationError::XmlValidationError(    Uint32 lineNumber,    const String& message)    : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message){}XmlValidationError::XmlValidationError(    Uint32 lineNumber,    MessageLoaderParms& msgParms)    : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms){}//////////////////////////////////////////////////////////////////////////////////// XmlSemanticError//////////////////////////////////////////////////////////////////////////////////XmlSemanticError::XmlSemanticError(    Uint32 lineNumber,    const String& message)    : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message){}XmlSemanticError::XmlSemanticError(    Uint32 lineNumber,    MessageLoaderParms& msgParms)    : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms){}//////////////////////////////////////////////////////////////////////////////////// XmlParser//////////////////////////////////////////////////////////////////////////////////XmlParser::XmlParser(char* text)    : _line(1),      _text(text),      _current(text),      _restoreChar('\0'),      _foundRoot(false){}inline void _skipWhitespace(Uint32& line, char*& p){    while (*p && _isspace(*p))    {        if (*p == '\n')            line++;        p++;    }}static int _getEntityRef(char*& p){    if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';'))    {        p += 3;        return '>';    }    if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';'))    {        p += 3;        return '<';    }    if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') &&        (p[4] == ';'))    {        p += 5;        return '\'';    }    if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') &&        (p[4] == ';'))    {        p += 5;        return '"';    }    if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';'))    {        p += 4;        return '&';    }    return -1;}static inline int _getCharRef(char*& p, bool hex){    char* end;    unsigned long ch;    if (hex)    {        ch = strtoul(p, &end, 16);    }    else    {        ch = strtoul(p, &end, 10);    }    if ((end == p) || (*end != ';') || (ch > 255))    {        return -1;    }    if ((hex && (end - p > 4)) || (!hex && (end - p > 5)))    {        return -1;    }    p = end + 1;    return ch;}static void _normalize(Uint32& line, char*& p, char end_char, char*& start){    // Skip over leading whitespace:    _skipWhitespace(line, p);    start = p;    // Process one character at a time:    char* q = p;    while (*p && (*p != end_char))    {        if (_isspace(*p))        {            // Compress sequences of whitespace characters to a single space            // character. Update line number when newlines encountered.            if (*p++ == '\n')            {                line++;            }            *q++ = ' ';            _skipWhitespace(line, p);        }        else if (*p == '&')        {            // Process entity characters and entity references:            p++;            int ch;            if (*p == '#')            {                *p++;                if (*p == 'x')                {                    p++;                    ch = _getCharRef(p, true);                }                else                {                    ch = _getCharRef(p, false);                }            }            else            {                ch = _getEntityRef(p);            }            if (ch == -1)            {                throw XmlException(XmlException::MALFORMED_REFERENCE, line);            }            *q++ = ch;        }        else        {            *q++ = *p++;        }    }    // We encountered a the end_char or a zero-terminator.    *q = *p;    // Remove single trailing whitespace (consecutive whitespaces already    // compressed above).  Since p >= q, we can tell if we need to strip a    // trailing space from q by looking at the end of p.  We must not look at    // the last character of p, though, if p is an empty string.    if ((p != start) && _isspace(p[-1]))    {        q--;    }    // If q got behind p, it is safe and necessary to null-terminate q    if (q != p)    {        *q = '\0';    }}Boolean XmlParser::next(XmlEntry& entry){    if (!_putBackStack.isEmpty())    {        entry = _putBackStack.top();        _putBackStack.pop();        return true;    }    // If a character was overwritten with a null-terminator the last    // time this routine was called, then put back that character. Before    // exiting of course, restore the null-terminator.    char* nullTerminator = 0;    if (_restoreChar && !*_current)    {        nullTerminator = _current;        *_current = _restoreChar;        _restoreChar = '\0';    }    // Skip over any whitespace:    _skipWhitespace(_line, _current);    if (!*_current)    {        if (nullTerminator)            *nullTerminator = '\0';        if (!_stack.isEmpty())            throw XmlException(XmlException::UNCLOSED_TAGS, _line);        return false;    }
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -