📄 xmlparser.cpp
字号:
/** **************************************************************************** * <P> XML.c - implementation file for basic XML parser written in ANSI C++ * for portability. It works by using recursion and a node tree for breaking * down the elements of an XML document. </P> * * @version V2.31 * @author Frank Vanden Berghen * * NOTE: * * If you add "#define STRICT_PARSING", on the first line of this file * the parser will see the following XML-stream: * <a><b>some text</b><b>other text </a> * as an error. Otherwise, this tring will be equivalent to: * <a><b>some text</b><b>other text</b></a> * * NOTE: * * If you add "#define APPROXIMATE_PARSING" on the first line of this file * the parser will see the following XML-stream: * <data name="n1"> * <data name="n2"> * <data name="n3" /> * as equivalent to the following XML-stream: * <data name="n1" /> * <data name="n2" /> * <data name="n3" /> * This can be useful for badly-formed XML-streams but prevent the use * of the following XML-stream (problem is: tags at contiguous levels * have the same names): * <data name="n1"> * <data name="n2"> * <data name="n3" /> * </data> * </data> * * NOTE: * * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file * the "openFileHelper" function will always display error messages inside the * console instead of inside a message-box-window. Message-box-windows are * available on windows 9x/NT/2000/XP/Vista only. * * BSD license: * Copyright (c) 2002, Frank Vanden Berghen * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the Frank Vanden Berghen nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * **************************************************************************** */#ifndef _CRT_SECURE_NO_DEPRECATE#define _CRT_SECURE_NO_DEPRECATE#endif#include "xmlParser.h"#ifdef _XMLWINDOWS//#ifdef _DEBUG//#define _CRTDBG_MAP_ALLOC//#include <crtdbg.h>//#endif#define WIN32_LEAN_AND_MEAN#include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files // to have "MessageBoxA" to display error messages for openFilHelper#endif#include <memory.h>#include <assert.h>#include <stdio.h>#include <string.h>#include <stdlib.h>XMLCSTR XMLNode::getVersion() { return _X("v2.30"); }void freeXMLString(XMLSTR t){free(t);}static XMLNode::XMLCharEncoding characterEncoding=XMLNode::encoding_UTF8;static char guessWideCharChars=1, dropWhiteSpace=1;inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }// You can modify the initialization of the variable "XMLClearTags" below// to change the clearTags that are currently recognized by the library.// The number on the second columns is the length of the string inside the// first column. The "<!DOCTYPE" declaration must be the second in the list.typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag;static ALLXMLClearTag XMLClearTags[] ={ { _X("<![CDATA["),9, _X("]]>") }, { _X("<!DOCTYPE"),9, _X(">") }, { _X("<PRE>") ,5, _X("</PRE>") }, { _X("<Script>") ,8, _X("</Script>")}, { _X("<!--") ,4, _X("-->") }, { NULL ,0, NULL }};// You can modify the initialization of the variable "XMLEntities" below// to change the character entities that are currently recognized by the library.// The number on the second columns is the length of the string inside the// first column. Additionally, the syntaxes " " and " " are recognized.typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;static XMLCharacterEntity XMLEntities[] ={ { _X("&" ), 5, _X('&' )}, { _X("<" ), 4, _X('<' )}, { _X(">" ), 4, _X('>' )}, { _X("""), 6, _X('\"')}, { _X("'"), 6, _X('\'')}, { NULL , 0, '\0' }};// When rendering the XMLNode to a string (using the "createXMLString" function),// you can ask for a beautiful formatting. This formatting is using the// following indentation character:#define INDENTCHAR _X('\t')// The following function parses the XML errors into a user friendly string.// You can edit this to change the output language of the library to something else.XMLCSTR XMLNode::getError(XMLError xerror){ switch (xerror) { case eXMLErrorNone: return _X("No error"); case eXMLErrorMissingEndTag: return _X("Warning: Unmatched end tag"); case eXMLErrorNoXMLTagFound: return _X("Warning: No XML tag found"); case eXMLErrorEmpty: return _X("Error: No XML data"); case eXMLErrorMissingTagName: return _X("Error: Missing start tag name"); case eXMLErrorMissingEndTagName: return _X("Error: Missing end tag name"); case eXMLErrorUnmatchedEndTag: return _X("Error: Unmatched end tag"); case eXMLErrorUnmatchedEndClearTag: return _X("Error: Unmatched clear tag end"); case eXMLErrorUnexpectedToken: return _X("Error: Unexpected token found"); case eXMLErrorNoElements: return _X("Error: No elements found"); case eXMLErrorFileNotFound: return _X("Error: File not found"); case eXMLErrorFirstTagNotFound: return _X("Error: First Tag not found"); case eXMLErrorUnknownCharacterEntity:return _X("Error: Unknown character entity"); case eXMLErrorCharConversionError: return _X("Error: unable to convert between WideChar and MultiByte chars"); case eXMLErrorCannotOpenWriteFile: return _X("Error: unable to open file for writing"); case eXMLErrorCannotWriteFile: return _X("Error: cannot write into file"); case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _X("Warning: Base64-string length is not a multiple of 4"); case eXMLErrorBase64DecodeTruncatedData: return _X("Warning: Base64-string is truncated"); case eXMLErrorBase64DecodeIllegalCharacter: return _X("Error: Base64-string contains an illegal character"); case eXMLErrorBase64DecodeBufferTooSmall: return _X("Error: Base64 decode output buffer is too small"); }; return _X("Unknown");}/////////////////////////////////////////////////////////////////////////// Here start the abstraction layer to be OS-independent ///////////////////////////////////////////////////////////////////////////// Here is an abstraction layer to access some common string manipulation functions.// The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0,// Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++.// If you plan to "port" the library to a new system/compiler, all you have to do is// to edit the following lines.#ifdef XML_NO_WIDE_CHARchar myIsTextWideChar(const void *b, int len) { return FALSE; }#else #if defined (UNDER_CE) || !defined(_XMLWINDOWS) char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode {#ifdef sun // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE;#endif const wchar_t *s=(const wchar_t*)b; // buffer too small: if (len<(int)sizeof(wchar_t)) return FALSE; // odd length test if (len&1) return FALSE; /* only checks the first 256 characters */ len=mmin(256,len/sizeof(wchar_t)); // Check for the special byte order: if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE // checks for ASCII characters in the UNICODE stream int i,stats=0; for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++; if (stats>len/2) return TRUE; // Check for UNICODE NULL chars for (i=0; i<len; i++) if (!s[i]) return TRUE; return FALSE; } #else char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); }; #endif#endif#ifdef _XMLWINDOWS// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET, #ifdef _XMLWIDECHAR wchar_t *myMultiByteToWideChar(const char *s) { int i; if (characterEncoding==XMLNode::encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0); else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0); if (i<0) return NULL; wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR)); if (characterEncoding==XMLNode::encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i); else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i); d[i]=0; return d; } static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); } static inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); } static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);} static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); } static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } #else char *myWideCharToMultiByte(const wchar_t *s) { UINT codePage=CP_ACP; if (characterEncoding==XMLNode::encoding_UTF8) codePage=CP_UTF8; int i=(int)WideCharToMultiByte(codePage, // code page 0, // performance and mapping flags s, // wide-character string -1, // number of chars in string NULL, // buffer for new string 0, // size of buffer NULL, // default for unmappable chars NULL // set when default char used ); if (i<0) return NULL; char *d=(char*)malloc(i+1); WideCharToMultiByte(codePage, // code page 0, // performance and mapping flags s, // wide-character string -1, // number of chars in string d, // buffer for new string i, // size of buffer NULL, // default for unmappable chars NULL // set when default char used ); d[i]=0; return d; } static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } static inline int xstrlen(XMLCSTR c) { return (int)strlen(c); } static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);} static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); } static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } #endif #ifdef __BORLANDC__ static inline int _strnicmp(char *c1, char *c2, int l){ return strnicmp(c1,c2,l);} #endif#else// for gcc and CC #ifdef XML_NO_WIDE_CHAR char *myWideCharToMultiByte(const wchar_t *s) { return NULL; } #else char *myWideCharToMultiByte(const wchar_t *s) { const wchar_t *ss=s; int i=(int)wcsrtombs(NULL,&ss,0,NULL); if (i<0) return NULL; char *d=(char *)malloc(i+1); wcsrtombs(d,&s,i,NULL); d[i]=0; return d; } #endif #ifdef _XMLWIDECHAR wchar_t *myMultiByteToWideChar(const char *s) { const char *ss=s; int i=(int)mbsrtowcs(NULL,&ss,0,NULL); if (i<0) return NULL; wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t)); mbsrtowcs(d,&s,i,NULL); d[i]=0; return d; } int xstrlen(XMLCSTR c) { return wcslen(c); } #ifdef sun // for CC #include <widec.h> static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);} static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);} static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); } #else // for gcc static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);} static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); } #endif static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { char *filenameAscii=myWideCharToMultiByte(filename); FILE *f; if (mode[0]==_X('r')) f=fopen(filenameAscii,"rb"); else f=fopen(filenameAscii,"wb"); free(filenameAscii); return f; } #else static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } static inline int xstrlen(XMLCSTR c) { return strlen(c); } static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);} static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); } static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } #endif static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);}#endif/////////////////////////////////////////////////////////////////////////// the "openFileHelper" function ///////////////////////////////////////////////////////////////////////////// Since each application has its own way to report and deal with errors, you should modify & rewrite// the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs.XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag){ // guess the value of the global parameter "characterEncoding" // (the guess is based on the first 200 bytes of the file).
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -