📄 xmlparser.cpp
字号:
//%2006//////////////////////////////////////////////////////////////////////////// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.// Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;// IBM Corp.; EMC Corporation, The Open Group.// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;// EMC Corporation; VERITAS Software Corporation; The Open Group.// Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;// EMC Corporation; Symantec Corporation; The Open Group.//// Permission is hereby granted, free of charge, to any person obtaining a copy// of this software and associated documentation files (the "Software"), to// deal in the Software without restriction, including without limitation the// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or// sell copies of the Software, and to permit persons to whom the Software is// furnished to do so, subject to the following conditions:// // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.////==============================================================================////%///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// XmlParser//// This file contains a simple non-validating XML parser. Here are// serveral rules for well-formed XML://// 1. Documents must begin with an XML declaration://// <?xml version="1.0" standalone="yes"?>//// 2. Comments have the form://// <!-- blah blah blah -->//// 3. The following entity references are supported://// & - ampersand// < - less-than// > - greater-than// " - full quote// &apos - apostrophe//// as well as character (numeric) references://// 1 - decimal reference for character '1'// 1 - hexadecimal reference for character '1'//// 4. Element names and attribute names take the following form://// [A-Za-z_][A-Za-z_0-9-.:]//// 5. Arbitrary data (CDATA) can be enclosed like this://// <![CDATA[// ...// ]]>//// 6. Element names and attributes names are case-sensitive.//// 7. XmlAttribute values must be delimited by full or half quotes.// XmlAttribute values must be delimited.//// 8. <!DOCTYPE...>//// TODO://// ATTN: KS P1 4 Mar 2002. Review the following TODOs to see if there is// work. Handle <!DOCTYPE...> sections which are complicated (containing// rules rather than references to files).//// Remove newlines from string literals://// Example: <xyz x="hello// world">//////////////////////////////////////////////////////////////////////////////////#include <Pegasus/Common/Config.h>#include <cctype>#include <cstdio>#include <cstdlib>#include <cstring>#include "XmlParser.h"#include "Logger.h"#include "ExceptionRep.h"#include "CharSet.h"PEGASUS_NAMESPACE_BEGIN//////////////////////////////////////////////////////////////////////////////////// Static helper functions//////////////////////////////////////////////////////////////////////////////////static void _printValue(const char* p){ for (; *p; p++) { if (*p == '\n') PEGASUS_STD(cout) << "\\n"; else if (*p == '\r') PEGASUS_STD(cout) << "\\r"; else if (*p == '\t') PEGASUS_STD(cout) << "\\t"; else PEGASUS_STD(cout) << *p; }}struct EntityReference{ const char* match; Uint32 length; char replacement;};// ATTN: Add support for more entity referencesstatic EntityReference _references[] ={ { "&", 5, '&' }, { "<", 4, '<' }, { ">", 4, '>' }, { """, 6, '"' }, { "'", 6, '\'' }};// Implements a check for a whitespace character, without calling// isspace( ). The isspace( ) function is locale-sensitive,// and incorrectly flags some chars above 0x7f as whitespace. This// causes the XmlParser to incorrectly parse UTF-8 data.//// Section 2.3 of XML 1.0 Standard (http://www.w3.org/TR/REC-xml)// defines white space as:// S ::= (#x20 | #x9 | #xD | #xA)+static inline int _isspace(char c){ return CharSet::isXmlWhiteSpace((Uint8)c);}static Uint32 _REFERENCES_SIZE = (sizeof(_references) / sizeof(_references[0]));//////////////////////////////////////////////////////////////////////////////////// XmlException//////////////////////////////////////////////////////////////////////////////////static const char* _xmlMessages[] ={ "Bad opening element", "Bad closing element", "Bad attribute name", "Exepected equal sign", "Bad attribute value", "A \"--\" sequence found within comment", "Unterminated comment", "Unterminated CDATA block", "Unterminated DOCTYPE", "Too many attributes: parser only handles 10", "Malformed reference", "Expected a comment or CDATA following \"<!\" sequence", "Closing element does not match opening element", "One or more tags are still open", "More than one root element was encountered", "Validation error", "Semantic error"};static const char* _xmlKeys[] ={ "Common.XmlParser.BAD_START_TAG", "Common.XmlParser.BAD_END_TAG", "Common.XmlParser.BAD_ATTRIBUTE_NAME", "Common.XmlParser.EXPECTED_EQUAL_SIGN", "Common.XmlParser.BAD_ATTRIBUTE_VALUE", "Common.XmlParser.MINUS_MINUS_IN_COMMENT", "Common.XmlParser.UNTERMINATED_COMMENT", "Common.XmlParser.UNTERMINATED_CDATA", "Common.XmlParser.UNTERMINATED_DOCTYPE", "Common.XmlParser.TOO_MANY_ATTRIBUTES", "Common.XmlParser.MALFORMED_REFERENCE", "Common.XmlParser.EXPECTED_COMMENT_OR_CDATA", "Common.XmlParser.START_END_MISMATCH", "Common.XmlParser.UNCLOSED_TAGS", "Common.XmlParser.MULTIPLE_ROOTS", "Common.XmlParser.VALIDATION_ERROR", "Common.XmlParser.SEMANTIC_ERROR"};// l10n replace _formMessage (comment out the old one)/*static String _formMessage(Uint32 code, Uint32 line, const String& message){ String result = _xmlMessages[Uint32(code) - 1]; char buffer[32]; sprintf(buffer, "%d", line); result.append(": on line "); result.append(buffer); if (message.size()) { result.append(": "); result.append(message); } return result;}*/static MessageLoaderParms _formMessage( Uint32 code, Uint32 line, const String& message){ String dftMsg = _xmlMessages[Uint32(code) - 1]; String key = _xmlKeys[Uint32(code) - 1]; String msg = message; dftMsg.append(": on line $0"); if (message.size()) { msg = ": " + msg; dftMsg.append("$1"); } return MessageLoaderParms(key, dftMsg, line ,msg);}static MessageLoaderParms _formPartialMessage(Uint32 code, Uint32 line){ String dftMsg = _xmlMessages[Uint32(code) - 1]; String key = _xmlKeys[Uint32(code) - 1]; dftMsg.append(": on line $0"); return MessageLoaderParms(key, dftMsg, line);}XmlException::XmlException( XmlException::Code code, Uint32 lineNumber, const String& message) : Exception(_formMessage(code, lineNumber, message)){}XmlException::XmlException( XmlException::Code code, Uint32 lineNumber, MessageLoaderParms& msgParms) : Exception(_formPartialMessage(code, lineNumber)){ if (msgParms.default_msg.size()) { msgParms.default_msg = ": " + msgParms.default_msg; } _rep->message.append(MessageLoader::getMessage(msgParms));}//////////////////////////////////////////////////////////////////////////////////// XmlValidationError//////////////////////////////////////////////////////////////////////////////////XmlValidationError::XmlValidationError( Uint32 lineNumber, const String& message) : XmlException(XmlException::VALIDATION_ERROR, lineNumber, message){}XmlValidationError::XmlValidationError( Uint32 lineNumber, MessageLoaderParms& msgParms) : XmlException(XmlException::VALIDATION_ERROR, lineNumber, msgParms){}//////////////////////////////////////////////////////////////////////////////////// XmlSemanticError//////////////////////////////////////////////////////////////////////////////////XmlSemanticError::XmlSemanticError( Uint32 lineNumber, const String& message) : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, message){}XmlSemanticError::XmlSemanticError( Uint32 lineNumber, MessageLoaderParms& msgParms) : XmlException(XmlException::SEMANTIC_ERROR, lineNumber, msgParms){}//////////////////////////////////////////////////////////////////////////////////// XmlParser//////////////////////////////////////////////////////////////////////////////////XmlParser::XmlParser(char* text) : _line(1), _text(text), _current(text), _restoreChar('\0'), _foundRoot(false){}inline void _skipWhitespace(Uint32& line, char*& p){ while (*p && _isspace(*p)) { if (*p == '\n') line++; p++; }}static int _getEntityRef(char*& p){ if ((p[0] == 'g') && (p[1] == 't') && (p[2] == ';')) { p += 3; return '>'; } if ((p[0] == 'l') && (p[1] == 't') && (p[2] == ';')) { p += 3; return '<'; } if ((p[0] == 'a') && (p[1] == 'p') && (p[2] == 'o') && (p[3] == 's') && (p[4] == ';')) { p += 5; return '\''; } if ((p[0] == 'q') && (p[1] == 'u') && (p[2] == 'o') && (p[3] == 't') && (p[4] == ';')) { p += 5; return '"'; } if ((p[0] == 'a') && (p[1] == 'm') && (p[2] == 'p') && (p[3] == ';')) { p += 4; return '&'; } return -1;}static inline int _getCharRef(char*& p, bool hex){ char* end; unsigned long ch; if (hex) { ch = strtoul(p, &end, 16); } else { ch = strtoul(p, &end, 10); } if ((end == p) || (*end != ';') || (ch > 255)) { return -1; } if ((hex && (end - p > 4)) || (!hex && (end - p > 5))) { return -1; } p = end + 1; return ch;}static void _normalize(Uint32& line, char*& p, char end_char, char*& start){ // Skip over leading whitespace: _skipWhitespace(line, p); start = p; // Process one character at a time: char* q = p; while (*p && (*p != end_char)) { if (_isspace(*p)) { // Compress sequences of whitespace characters to a single space // character. Update line number when newlines encountered. if (*p++ == '\n') { line++; } *q++ = ' '; _skipWhitespace(line, p); } else if (*p == '&') { // Process entity characters and entity references: p++; int ch; if (*p == '#') { *p++; if (*p == 'x') { p++; ch = _getCharRef(p, true); } else { ch = _getCharRef(p, false); } } else { ch = _getEntityRef(p); } if (ch == -1) { throw XmlException(XmlException::MALFORMED_REFERENCE, line); } *q++ = ch; } else { *q++ = *p++; } } // We encountered a the end_char or a zero-terminator. *q = *p; // Remove single trailing whitespace (consecutive whitespaces already // compressed above). Since p >= q, we can tell if we need to strip a // trailing space from q by looking at the end of p. We must not look at // the last character of p, though, if p is an empty string. if ((p != start) && _isspace(p[-1])) { q--; } // If q got behind p, it is safe and necessary to null-terminate q if (q != p) { *q = '\0'; }}Boolean XmlParser::next(XmlEntry& entry){ if (!_putBackStack.isEmpty()) { entry = _putBackStack.top(); _putBackStack.pop(); return true; } // If a character was overwritten with a null-terminator the last // time this routine was called, then put back that character. Before // exiting of course, restore the null-terminator. char* nullTerminator = 0; if (_restoreChar && !*_current) { nullTerminator = _current; *_current = _restoreChar; _restoreChar = '\0'; } // Skip over any whitespace: _skipWhitespace(_line, _current); if (!*_current) { if (nullTerminator) *nullTerminator = '\0'; if (!_stack.isEmpty()) throw XmlException(XmlException::UNCLOSED_TAGS, _line); return false; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -