📄 xmlparser.cpp

📁 xml文件解析器解析所有的xml文件支持unicode
💻 CPP
📖 第 1 页 / 共 5 页
字号:
    d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);    d->pChild[pos].d=NULL;    d->pChild[pos]=XMLNode(d,lpszName,isDeclaration);    d->nChild++;    return d->pChild[pos];}// Add an attribute to an element.XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev){    if (!lpszName) return &emptyXMLAttribute;    if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; }    int nc=d->nAttribute;    d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute));    XMLAttribute *pAttr=d->pAttribute+nc;    pAttr->lpszName = lpszName;    pAttr->lpszValue = lpszValuev;    d->nAttribute++;    return pAttr;}// Add text to the element.XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos){    if (!lpszValue) return NULL;    if (!d) { myFree(lpszValue); return NULL; }    d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText);    d->pText[pos]=lpszValue;    d->nText++;    return lpszValue;}// Add clear (unformatted) text to the element.XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos){    if (!lpszValue) return &emptyXMLClear;    if (!d) { myFree(lpszValue); return &emptyXMLClear; }    d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear);    XMLClear *pNewClear=d->pClear+pos;    pNewClear->lpszValue = lpszValue;    if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen;    if (!lpszClose) lpszClose=XMLClearTags->lpszClose;    pNewClear->lpszOpenTag = lpszOpen;    pNewClear->lpszCloseTag = lpszClose;    d->nClear++;    return pNewClear;}// private:// Parse a clear (unformatted) type node.char XMLNode::parseClearTag(void *px, void *_pClear){    XML *pXML=(XML *)px;    ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear);    int cbTemp=0;    XMLCSTR lpszTemp=NULL;    XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex];    static XMLCSTR docTypeEnd=_X("]>");    // Find the closing tag    // Seems the <!DOCTYPE need a better treatment so lets handle it    if (pClear.lpszOpen==XMLClearTags[1].lpszOpen)    {        XMLCSTR pCh=lpXML;        while (*pCh)        {            if (*pCh==_X('<')) { pClear.lpszClose=docTypeEnd; lpszTemp=xstrstr(lpXML,docTypeEnd); break; }            else if (*pCh==_X('>')) { lpszTemp=pCh; break; }#ifdef _XMLWIDECHAR            pCh++;#else            pCh+=XML_ByteTable[(unsigned char)(*pCh)];#endif        }    } else lpszTemp=xstrstr(lpXML, pClear.lpszClose);    if (lpszTemp)    {        // Cache the size and increment the index        cbTemp = (int)(lpszTemp - lpXML);        pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose);        // Add the clear node to the current element        addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1);        return 0;    }    // If we failed to find the end tag    pXML->error = eXMLErrorUnmatchedEndClearTag;    return 1;}void XMLNode::exactMemory(XMLNodeData *d){    if (d->pOrder)     d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int));    if (d->pChild)     d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode));    if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute));    if (d->pText)      d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR));    if (d->pClear)     d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear));}char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr){    XML *pXML=(XML *)pa;    XMLCSTR lpszText=pXML->lpszText;    if (!lpszText) return 0;    if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++;    int cbText = (int)(tokenPStr - lpszText);    if (!cbText) { pXML->lpszText=NULL; return 0; }    if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; }    if (!cbText) { pXML->lpszText=NULL; return 0; }    XMLSTR lpt=fromXMLString(lpszText,cbText,pXML);    if (!lpt) return 1;    addText_priv(MEMORYINCREASE,lpt,-1);    pXML->lpszText=NULL;    return 0;}// private:// Recursively parse an XML element.int XMLNode::ParseXMLElement(void *pa){    XML *pXML=(XML *)pa;    int cbToken;    enum XMLTokenTypeTag xtype;    NextToken token;    XMLCSTR lpszTemp=NULL;    int cbTemp=0;    char nDeclaration;    XMLNode pNew;    enum Status status; // inside or outside a tag    enum Attrib attrib = eAttribName;    assert(pXML);    // If this is the first call to the function    if (pXML->nFirst)    {        // Assume we are outside of a tag definition        pXML->nFirst = FALSE;        status = eOutsideTag;    } else    {        // If this is not the first call then we should only be called when inside a tag.        status = eInsideTag;    }    // Iterate through the tokens in the document    for(;;)    {        // Obtain the next token        token = GetNextToken(pXML, &cbToken, &xtype);        if (xtype != eTokenError)        {            // Check the current status            switch(status)            {            // If we are outside of a tag definition            case eOutsideTag:                // Check what type of token we obtained                switch(xtype)                {                // If we have found text or quoted text                case eTokenText:                case eTokenCloseTag:          /* '>'         */                case eTokenShortHandClose:    /* '/>'        */                case eTokenQuotedText:                case eTokenEquals:                    break;                // If we found a start tag '<' and declarations '<?'                case eTokenTagStart:                case eTokenDeclaration:                    // Cache whether this new element is a declaration or not                    nDeclaration = (xtype == eTokenDeclaration);                    // If we have node text then add this to the element                    if (maybeAddTxT(pXML,token.pStr)) return FALSE;                    // Find the name of the tag                    token = GetNextToken(pXML, &cbToken, &xtype);                    // Return an error if we couldn't obtain the next token or                    // it wasnt text                    if (xtype != eTokenText)                    {                        pXML->error = eXMLErrorMissingTagName;                        return FALSE;                    }                    // If we found a new element which is the same as this                    // element then we need to pass this back to the caller..#ifdef APPROXIMATE_PARSING                    if (d->lpszName &&                        myTagCompare(d->lpszName, token.pStr) == 0)                    {                        // Indicate to the caller that it needs to create a                        // new element.                        pXML->lpNewElement = token.pStr;                        pXML->cbNewElement = cbToken;                        return TRUE;                    } else#endif                    {                        // If the name of the new element differs from the name of                        // the current element we need to add the new element to                        // the current one and recurse                        pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1);                        while (!pNew.isEmpty())                        {                            // Callself to process the new node.  If we return                            // FALSE this means we dont have any more                            // processing to do...                            if (!pNew.ParseXMLElement(pXML)) return FALSE;                            else                            {                                // If the call to recurse this function                                // evented in a end tag specified in XML then                                // we need to unwind the calls to this                                // function until we find the appropriate node                                // (the element name and end tag name must                                // match)                                if (pXML->cbEndTag)                                {                                    // If we are back at the root node then we                                    // have an unmatched end tag                                    if (!d->lpszName)                                    {                                        pXML->error=eXMLErrorUnmatchedEndTag;                                        return FALSE;                                    }                                    // If the end tag matches the name of this                                    // element then we only need to unwind                                    // once more...                                    if (myTagCompare(d->lpszName, pXML->lpEndTag)==0)                                    {                                        pXML->cbEndTag = 0;                                    }                                    return TRUE;                                } else                                    if (pXML->cbNewElement)                                    {                                        // If the call indicated a new element is to                                        // be created on THIS element.                                        // If the name of this element matches the                                        // name of the element we need to create                                        // then we need to return to the caller                                        // and let it process the element.                                        if (myTagCompare(d->lpszName, pXML->lpNewElement)==0)                                        {                                            return TRUE;                                        }                                        // Add the new element and recurse                                        pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1);                                        pXML->cbNewElement = 0;                                    }                                    else                                    {                                        // If we didn't have a new element to create                                        pNew = emptyXMLNode;                                    }                            }                        }                    }                    break;                // If we found an end tag                case eTokenTagEnd:                    // If we have node text then add this to the element                    if (maybeAddTxT(pXML,token.pStr)) return FALSE;                    // Find the name of the end tag                    token = GetNextToken(pXML, &cbTemp, &xtype);                    // The end tag should be text                    if (xtype != eTokenText)                    {                        pXML->error = eXMLErrorMissingEndTagName;                        return FALSE;                    }                    lpszTemp = token.pStr;                    // After the end tag we should find a closing tag                    token = GetNextToken(pXML, &cbToken, &xtype);                    if (xtype != eTokenCloseTag)                    {                        pXML->error = eXMLErrorMissingEndTagName;                        return FALSE;                    }                    pXML->lpszText=pXML->lpXML+pXML->nIndex;                    // We need to return to the previous caller.  If the name                    // of the tag cannot be found we need to keep returning to                    // caller until we find a match                    if (myTagCompare(d->lpszName, lpszTemp) != 0)#ifdef STRICT_PARSING                    {                        pXML->error=eXMLErrorUnmatchedEndTag;                        pXML->nIndexMissigEndTag=pXML->nIndex;                        return FALSE;                    }#else                    {                        pXML->error=eXMLErrorMissingEndTag;                        pXML->nIndexMissigEndTag=pXML->nIndex;                        pXML->lpEndTag = lpszTemp;                        pXML->cbEndTag = cbTemp;                    }#endif                    // Return to the caller                    exactMemory(d);                    return TRUE;                // If we found a clear (unformatted) token                case eTokenClear:                    // If we have node text then add this to the element                    if (maybeAddTxT(pXML,token.pStr)) return FALSE;                    if (parseClearTag(pXML, token.pClr)) return FALSE;                    pXML->lpszText=pXML->lpXML+pXML->nIndex;                    break;                default:                    break;                }                break;            // If we are inside a tag definition we need to search for attributes            case eInsideTag:                // Check what part of the attribute (name, equals, value) we                // are looking for.                switch(attrib)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -