📄 xmlparser.cpp

📁 upnpForWindows by microsoft
💻 CPP
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/** **************************************************************************** * <P> XML.c - implementation file for basic XML parser written in ANSI C++ * for portability. It works by using recursion and a node tree for breaking * down the elements of an XML document.  </P> * * @version     V1.11 * * @author      Frank Vanden Berghen * based on original implementation by Martyn C Brown * * NOTE: * *   If you add "#define STRICT_PARSING", on the first line of this file *   the parser will see the following XML-stream: *      <a><b>some text</b><b>other text    </a> *   as an error. Otherwise, this tring will be equivalent to: *      <a><b>some text</b><b>other text</b></a> * * NOTE: * *   If you add "#define APPROXIMATE_PARSING", on the first line of this file *   the parser will see the following XML-stream: *     <data name="n1"> *     <data name="n2"> *     <data name="n3" /> *   as equivalent to the following XML-stream: *     <data name="n1" /> *     <data name="n2" /> *     <data name="n3" /> *   This can be useful for badly-formed XML-streams but prevent the use *   of the following XML-stream: *     <data name="n1"> *        <data name="n2"> *            <data name="n3" /> *        </data> *     </data> * * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1 as published by the Free Software Foundation * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * **************************************************************************** */#ifdef WIN32#define WIN32_LEAN_AND_MEAN
#include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte                     // to handle unicode files#endif#include <memory.h>#include <assert.h>#include <stdio.h>#include <string.h>#include <stdlib.h>#include "xmlParser.h"//#ifdef WIN32
//#ifdef _DEBUG
//#define _CRTDBG_MAP_ALLOC
//#include <crtdbg.h>
//#endif
//#endif
XMLNode XMLNode::emptyXMLNode;XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};#ifndef WIN32int _tcslen(const char *c)   { return strlen(c); }int _tcsnicmp(const char *c1, const char *c2, int l) { return strncasecmp(c1,c2,l); }int _tcsicmp(const char *c1, const char *c2) { return strcasecmp(c1,c2); }char *_tcsstr(const char *c1, const char *c2) { return (char*)strstr(c1,c2); }char *_tcschr(const char *c1, int c2) { return (char*)strchr(c1,c2); }char *_tcscpy(char *c1, const char *c2) { return (char*)strcpy(c1,c2); }#endifinline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }// Enumeration used to decipher what type a token istypedef enum TokenTypeTag{    eTokenText = 0,    eTokenQuotedText,    eTokenTagStart,         /* "<"            */    eTokenTagEnd,           /* "</"           */    eTokenCloseTag,         /* ">"            */    eTokenEquals,           /* "="            */    eTokenDeclaration,      /* "<?"           */    eTokenShortHandClose,   /* "/>"           */    eTokenClear,    eTokenError};#define INDENTCHAR    _T('\t')typedef struct ClearTag{    LPCTSTR lpszOpen;    LPCTSTR lpszClose;} ClearTag;// Main structure used for parsing XMLtypedef struct XML{    LPCTSTR                lpXML;    int                    nIndex;    enum XMLError          error;    LPCTSTR                lpEndTag;    int                    cbEndTag;    LPCTSTR                lpNewElement;    int                    cbNewElement;    int                    nFirst;    ClearTag               *pClrTags;} XML;typedef struct{    ClearTag    *pClr;    LPCTSTR     pStr;} NextToken;// Enumeration used when parsing attributestypedef enum Attrib{    eAttribName = 0,    eAttribEquals,    eAttribValue} Attrib;// Enumeration used when parsing elements to dictate whether we are currently// inside a tagtypedef enum Status{    eInsideTag = 0,    eOutsideTag} Status;// private:LPTSTR toXMLString(LPTSTR dest,LPCTSTR source){    LPTSTR dd=dest;    while (*source)    {        switch (*source)        {        case '<' : _tcscpy(dest,_T("&lt;"  )); dest+=4; break;        case '>' : _tcscpy(dest,_T("&gt;"  )); dest+=4; break;        case '&' : _tcscpy(dest,_T("&amp;" )); dest+=5; break;        case '\'': _tcscpy(dest,_T("&apos;")); dest+=6; break;        case '"' : _tcscpy(dest,_T("&quot;")); dest+=6; break;        default:  *dest=*source; dest++; break;        }        source++;    }    *dest=0;    return dd;}// private:int lengthXMLString(LPCTSTR source){    int r=0;    while (*source)    {        switch (*source)        {        case '<':  r+=3; break;        case '>' : r+=3; break;        case '&' : r+=4; break;        case '\'': r+=5; break;        case '"' : r+=5; break;        }        source++; r++;    }    return r;}LPTSTR toXMLString(LPCTSTR source){    LPTSTR dest=(LPTSTR)malloc((lengthXMLString(source)+1)*sizeof(TCHAR));    return toXMLString(dest,source);}LPTSTR toXMLStringFast(LPTSTR *dest,int *destSz, LPCTSTR source){    int l=lengthXMLString(source)+1;    if (l>*destSz) { *destSz=l; *dest=(LPTSTR)realloc(*dest,l*sizeof(TCHAR)); }    return toXMLString(*dest,source);}// private:LPTSTR fromXMLString(LPCTSTR s, int lo){    // This function is the opposite of the function "toXMLString". It decodes the escape    // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters    // &,",',<,>. This function is used internally by the XML Parser. All the calls to    // the XML library will always gives you back "decoded" strings.    //    // in: string (s) and length (lo) of string    // out:  new allocated string converted from xml    if (!s) return NULL;    int ll=0;    LPTSTR d;    LPCTSTR ss=s;    while (((lo--)>0)&&(*s))    {        if (*s==_T('&'))        {            s++;                 if (_tcsnicmp(s,_T("lt;"  ),3)==0) { s+=2; lo-=3; }            else if (_tcsnicmp(s,_T("gt;"  ),3)==0) { s+=2; lo-=3; }            else if (_tcsnicmp(s,_T("amp;" ),4)==0) { s+=3; lo-=4; }            else if (_tcsnicmp(s,_T("apos;"),5)==0) { s+=4; lo-=5; }            else if (_tcsnicmp(s,_T("quot;"),5)==0) { s+=4; lo-=5; }            else            {                ll=0; while (s[ll]&&(s[ll]!=_T(';'))&&(ll<10)) ll++; ll++;                d=(LPTSTR)malloc((ll+1)*sizeof(TCHAR));                d[ll]=0;                while(ll--) d[ll]=s[ll];#ifdef _UNICODE                    printf("unknown escape character: '&%S'",d);#else                    printf("unknown escape character: '&%s'",d);#endif                free(d);                exit(255);            }        };        ll++; s++;    }    d=(LPTSTR)malloc((ll+1)*sizeof(TCHAR));    s=d;    while (ll--)    {        if (*ss==_T('&'))        {            ss++;                 if (_tcsnicmp(ss,_T("lt;"  ),3)==0) { *(d++)=_T('<' ); ss+=3; }            else if (_tcsnicmp(ss,_T("gt;"  ),3)==0) { *(d++)=_T('>' ); ss+=3; }            else if (_tcsnicmp(ss,_T("amp;" ),4)==0) { *(d++)=_T('&' ); ss+=4; }            else if (_tcsnicmp(ss,_T("apos;"),5)==0) { *(d++)=_T('\''); ss+=5; }            else                                     {                *(d++)=_T('"' ); ss+=5; }        } else { *(d++)=*ss; ss++; }    }    *d=0;    return (LPTSTR)s;}// private:char myTagCompare(LPCTSTR cclose, LPCTSTR copen)// !!!! WARNING strange convention&:// return 0 if equals// return 1 if different{    if (!cclose) return 1;    int l=(int)_tcslen(cclose);    if (_tcsnicmp(cclose, copen, l)!=0) return 1;    const TCHAR c=copen[l];    if ((c==_T('\n'))||        (c==_T(' ' ))||        (c==_T('\t'))||        (c==_T('\r'))||        (c==_T('/' ))||        (c==_T('<' ))||        (c==_T('>' ))||        (c==_T('=' ))) return 0;    return 1;}// private:// update "order" information when deleting a content of a XMLNodevoid XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index){    int j=(int)((index<<2)+t),i=0,n=nElement(d)+1, *o=d->pOrder;    while ((o[i]!=j)&&(i<n)) i++;    n--;    memmove(o+i, o+i+1, (n-i)*sizeof(int));    for (;i<n;i++)        if ((o[i]&3)==(int)t) o[i]-=4;// We should normally do:// d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));// but we skip reallocation because it's too time consuming.// Anyway, at the end, it will be free'd completely at once.}// Obtain the next character from the string.static inline TCHAR getNextChar(XML *pXML){    TCHAR ch = pXML->lpXML[pXML->nIndex];    if (ch!=0) pXML->nIndex++;    return ch;}// Find next non-white space character.static TCHAR FindNonWhiteSpace(XML *pXML){    TCHAR ch;    int nExit = FALSE;    assert(pXML);    // Iterate through characters in the string until we find a NULL or a    // non-white space character    while((nExit == FALSE) && (ch = getNextChar(pXML)))    {        switch(ch)        {        // Ignore white space        case _T('\n'):        case _T(' '):        case _T('\t'):        case _T('\r'): continue;        default: nExit = TRUE;        }    }    return ch;}// Find the next token in a string.// pcbToken contains the number of characters that have been read.static NextToken GetNextToken(XML *pXML, int *pcbToken, enum TokenTypeTag *pType){    NextToken        result;    LPCTSTR          lpXML;    TCHAR            ch;    TCHAR            chTemp;    int              nSize;    int              nFoundMatch;    int              nExit;    int              n;    LPCTSTR          lpszOpen;    int              cbOpen;    int              nIsText = FALSE;    // Find next non-white space character    ch = FindNonWhiteSpace(pXML);    if (ch)    {        // Cache the current string pointer        lpXML = pXML->lpXML;        result.pStr = &lpXML[pXML->nIndex-1];        // First check whether the token is in the clear tag list (meaning it        // does not need formatting).        n = 0;        while(TRUE)        {            // Obtain the name of the open part of the clear tag            lpszOpen = pXML->pClrTags[n].lpszOpen;            if (lpszOpen)            {                // Compare the open tag with the current token                cbOpen = (int)_tcslen(lpszOpen);                // if (myTagCompare(lpszOpen, result.pStr) == 0)                if (_tcsnicmp(lpszOpen, result.pStr, cbOpen)==0)                {                    result.pClr = &pXML->pClrTags[n];                    pXML->nIndex += (int)(_tcslen(lpszOpen)-1);                    *pType  = eTokenClear;                    return result;                }                n++;            }            else break;        }        // If we didn't find a clear tag then check for standard tokens        chTemp = 0;        lpXML = pXML->lpXML;        switch(ch)        {        // Check for quotes        case _T('\''):        case _T('\"'):            // Type of token            *pType = eTokenQuotedText;            chTemp = ch;            n=pXML->nIndex;            // Set the size            nSize = 1;            nFoundMatch = FALSE;            // Search through the string to find a matching quote            while((ch = getNextChar(pXML)))            {                nSize++;                if (ch==chTemp) { nFoundMatch = TRUE; break; }                if (ch==_T('<')) break;            }            // If we failed to find a matching quote            if (nFoundMatch == FALSE)            {                pXML->nIndex=n-1;                ch=getNextChar(pXML);                nIsText=TRUE;                break;            }            //  4.02.2002            if (FindNonWhiteSpace(pXML))            {                pXML->nIndex--;            }            break;        // Equals (used with attribute values)        case _T('='):            nSize = 1;            *pType = eTokenEquals;            break;        // Close tag        case _T('>'):            nSize = 1;            *pType = eTokenCloseTag;            break;        // Check for tag start and tag end        case _T('<'):            // Peek at the next character to see if we have an end tag '</',            // or an xml declaration '<?'            chTemp = pXML->lpXML[pXML->nIndex];            // If we have a tag end...            if (chTemp == _T('/'))            {                // Set the type and ensure we point at the next character                getNextChar(pXML);                *pType = eTokenTagEnd;                nSize = 2;            }            // If we have an XML declaration tag            else if (chTemp == _T('?'))            {                // Set the type and ensure we point at the next character                getNextChar(pXML);                *pType = eTokenDeclaration;                nSize = 2;            }            // Otherwise we must have a start tag            else            {
12 3 4 下一页
💿 文件大小 509 K
👤 上传用户 ywq9089
📂 所属分类软件设计/软件工程
🏷️ 相关标签

#upnpForWindows #microsoft #by
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -