📄 xmlparser.cpp

📁 xml文件解析器解析所有的xml文件支持unicode
💻 CPP
📖 第 1 页 / 共 5 页
字号:
    FILE *f=xfopen(filename,_X("rb"));    if (f)    {        char bb[205];        int l=(int)fread(bb,1,200,f);        setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace);        fclose(f);    }    // parse the file    XMLResults pResults;    XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);    // display error message (if any)    if (pResults.error != eXMLErrorNone)    {        // create message        char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_X("");        if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; }        sprintf(message,#ifdef _XMLWIDECHAR            "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"#else            "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"#endif            ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);        // display message#if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_)        MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);#else        printf("%s",message);#endif        exit(255);    }    return xnode;}///////////////////////////////////////////////////////////////////////////      Here start the core implementation of the XMLParser library    ///////////////////////////////////////////////////////////////////////////// You should normally not change anything below this point.#ifndef _XMLWIDECHAR// If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte.// If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes).// If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes).// This table is used as lookup-table to know the length of a character (in byte) based on the// content of the first byte of the character.// (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).static const char XML_utf8ByteTable[256] ={    //  0 1 2 3 4 5 6 7 8 9 a b c d e f    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70End of ASCII range    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0    1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte    4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid};static const char XML_asciiByteTable[256] ={    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};static const char XML_sjisByteTable[256] ={    //  0 1 2 3 4 5 6 7 8 9 a b c d e f    0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range    1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0};static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8"#endifXMLNode XMLNode::emptyXMLNode;XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};// Enumeration used to decipher what type a token istypedef enum XMLTokenTypeTag{    eTokenText = 0,    eTokenQuotedText,    eTokenTagStart,         /* "<"            */    eTokenTagEnd,           /* "</"           */    eTokenCloseTag,         /* ">"            */    eTokenEquals,           /* "="            */    eTokenDeclaration,      /* "<?"           */    eTokenShortHandClose,   /* "/>"           */    eTokenClear,    eTokenError} XMLTokenType;// Main structure used for parsing XMLtypedef struct XML{    XMLCSTR                lpXML;    XMLCSTR                lpszText;    int                    nIndex,nIndexMissigEndTag;    enum XMLError          error;    XMLCSTR                lpEndTag;    int                    cbEndTag;    XMLCSTR                lpNewElement;    int                    cbNewElement;    int                    nFirst;} XML;typedef struct{    ALLXMLClearTag *pClr;    XMLCSTR     pStr;} NextToken;// Enumeration used when parsing attributestypedef enum Attrib{    eAttribName = 0,    eAttribEquals,    eAttribValue} Attrib;// Enumeration used when parsing elements to dictate whether we are currently// inside a tagtypedef enum Status{    eInsideTag = 0,    eOutsideTag} Status;XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const{    if (!d) return eXMLErrorNone;    FILE *f=xfopen(filename,_X("wb"));    if (!f) return eXMLErrorCannotOpenWriteFile;#ifdef _XMLWIDECHAR    unsigned char h[2]={ 0xFF, 0xFE };    if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile;    if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))    {        if (!fwrite(_X("<?xml version=\"1.0\" encoding=\"utf-16\"?>\n"),sizeof(wchar_t)*40,1,f))            return eXMLErrorCannotWriteFile;    }#else    if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))    {        if (characterEncoding==encoding_UTF8)        {            // header so that windows recognize the file as UTF-8:            unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;            encoding="utf-8";        } else if (characterEncoding==encoding_ShiftJIS) encoding="SHIFT-JIS";                if (!encoding) encoding="ISO-8859-1";        if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) return eXMLErrorCannotWriteFile;    } else    {        if (characterEncoding==encoding_UTF8)         {            unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;        }    }#endif    int i;    XMLSTR t=createXMLString(nFormat,&i);    if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile;    if (fclose(f)!=0) return eXMLErrorCannotWriteFile;    free(t);    return eXMLErrorNone;}// Duplicate a given string.XMLSTR stringDup(XMLCSTR lpszData, int cbData){    if (lpszData==NULL) return NULL;    XMLSTR lpszNew;    if (cbData==0) cbData=(int)xstrlen(lpszData);    lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));    if (lpszNew)    {        memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));        lpszNew[cbData] = (XMLCHAR)NULL;    }    return lpszNew;}XMLSTR toXMLStringUnSafe(XMLSTR dest,XMLCSTR source){    XMLSTR dd=dest;    XMLCHAR ch;    XMLCharacterEntity *entity;    while ((ch=*source))    {        entity=XMLEntities;        do        {            if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }            entity++;        } while(entity->s);#ifdef _XMLWIDECHAR        *(dest++)=*(source++);#else        switch(XML_ByteTable[(unsigned char)ch])        {        case 4: *(dest++)=*(source++);        case 3: *(dest++)=*(source++);        case 2: *(dest++)=*(source++);        case 1: *(dest++)=*(source++);        }#endifout_of_loop1:        ;    }    *dest=0;    return dd;}// private (used while rendering):int lengthXMLString(XMLCSTR source){    int r=0;    XMLCharacterEntity *entity;    XMLCHAR ch;    while ((ch=*source))    {        entity=XMLEntities;        do        {            if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }            entity++;        } while(entity->s);#ifdef _XMLWIDECHAR        r++; source++;#else        ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;#endifout_of_loop1:        ;    }    return r;}ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); }void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }XMLSTR ToXMLStringTool::toXML(XMLCSTR source){    int l=lengthXMLString(source)+1;    if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); }    return toXMLStringUnSafe(buf,source);}// private:XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML){    // This function is the opposite of the function "toXMLString". It decodes the escape    // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters    // &,",',<,>. This function is used internally by the XML Parser. All the calls to    // the XML library will always gives you back "decoded" strings.    //    // in: string (s) and length (lo) of string    // out:  new allocated string converted from xml    if (!s) return NULL;    int ll=0,j;    XMLSTR d;    XMLCSTR ss=s;    XMLCharacterEntity *entity;    while ((lo>0)&&(*s))    {        if (*s==_X('&'))        {            if ((lo>2)&&(s[1]==_X('#')))            {                s+=2; lo-=2;                if ((*s==_X('X'))||(*s==_X('x'))) { s++; lo--; }                while ((*s)&&(*s!=_X(';'))&&((lo--)>0)) s++;                if (*s!=_X(';'))                {                    pXML->error=eXMLErrorUnknownCharacterEntity;                    return NULL;                }                s++; lo--;            } else            {                entity=XMLEntities;                do                {                    if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }                    entity++;                } while(entity->s);                if (!entity->s)                {                    pXML->error=eXMLErrorUnknownCharacterEntity;                    return NULL;                }            }        } else        {#ifdef _XMLWIDECHAR            s++; lo--;#else            j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;#endif        }        ll++;    }    d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));    s=d;    while (ll-->0)    {        if (*ss==_X('&'))        {            if (ss[1]==_X('#'))            {                ss+=2; j=0;                if ((*ss==_X('X'))||(*ss==_X('x')))                {
💿 文件大小 63 K
👤 上传用户 lvuxinwu
📂 所属分类编译器/解释器
🏷️ 相关标签

#xml #unicode #文件解析
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -