dgxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,902 行 · 第 1/5 页

CPP
1,902
字号
                }                // That went ok, so scan for any miscellaneous stuff                scanMiscellaneous();                if (fDocHandler)                    fDocHandler->endDocument();            }        }    }    //  NOTE:    //    //  In all of the error processing below, the emitError() call MUST come    //  before the flush of the reader mgr, or it will fail because it tries    //  to find out the position in the XML source of the error.    catch(const XMLErrs::Codes)    {        // This is a 'first failure' exception, so reset and return failure        fReaderMgr.reset();        return false;    }    catch(const XMLValid::Codes)    {        // This is a 'first fatal error' type exit, so reset and reuturn failure        fReaderMgr.reset();        return false;    }    catch(const XMLException& excToCatch)    {        //  Emit the error and catch any user exception thrown from here. Make        //  sure in all cases we flush the reader manager.        fInException = true;        try        {            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)                emitError                (                    XMLErrs::XMLException_Warning                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)                emitError                (                    XMLErrs::XMLException_Fatal                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else                emitError                (                    XMLErrs::XMLException_Error                    , excToCatch.getType()                    , excToCatch.getMessage()                );        }        catch(const OutOfMemoryException&)        {            throw;        }        catch(...)        {            // Reset and rethrow user error            fReaderMgr.reset();            throw;        }        // Reset and return failure        fReaderMgr.reset();        return false;    }    catch(const OutOfMemoryException&)    {        throw;    }    catch(...)    {        // Reset and rethrow original error        fReaderMgr.reset();        throw;    }    // If we hit the end, then flush the reader manager    if (!retVal)        fReaderMgr.reset();    return retVal;}// ---------------------------------------------------------------------------//  DGXMLScanner: Private scanning methods// ---------------------------------------------------------------------------//  This method will kick off the scanning of the primary content of the//  document, i.e. the elements.bool DGXMLScanner::scanContent(){    //  Go into a loop until we hit the end of the root element, or we fall    //  out because there is no root element.    //    //  We have to do kind of a deeply nested double loop here in order to    //  avoid doing the setup/teardown of the exception handler on each    //  round. Doing it this way we only do it when an exception actually    //  occurs.    bool gotData = true;    bool inMarkup = false;    while (gotData)    {        try        {            while (gotData)            {                //  Sense what the next top level token is. According to what                //  this tells us, we will call something to handle that kind                //  of thing.                unsigned int orgReader;                const XMLTokens curToken = senseNextToken(orgReader);                //  Handle character data and end of file specially. Char data                //  is not markup so we don't want to handle it in the loop                //  below.                if (curToken == Token_CharData)                {                    //  Scan the character data and call appropriate events. Let                    //  him use our local character data buffer for efficiency.                    scanCharData(fCDataBuf);                    continue;                }                else if (curToken == Token_EOF)                {                    //  The element stack better be empty at this point or we                    //  ended prematurely before all elements were closed.                    if (!fElemStack.isEmpty())                    {                        const ElemStack::StackElem* topElem = fElemStack.popTop();                        emitError                        (                            XMLErrs::EndedWithTagsOnStack                            , topElem->fThisElement->getFullName()                        );                    }                    // Its the end of file, so clear the got data flag                    gotData = false;                    continue;                }                // We are in some sort of markup now                inMarkup = true;                //  According to the token we got, call the appropriate                //  scanning method.                switch(curToken)                {                    case Token_CData :                        // Make sure we are within content                        if (fElemStack.isEmpty())                            emitError(XMLErrs::CDATAOutsideOfContent);                        scanCDSection();                        break;                    case Token_Comment :                        scanComment();                        break;                    case Token_EndTag :                        scanEndTag(gotData);                        break;                    case Token_PI :                        scanPI();                        break;                    case Token_StartTag :                        scanStartTag(gotData);                        break;                    default :                        fReaderMgr.skipToChar(chOpenAngle);                        break;                }                if (orgReader != fReaderMgr.getCurrentReaderNum())                    emitError(XMLErrs::PartialMarkupInEntity);                // And we are back out of markup again                inMarkup = false;            }        }        catch(const EndOfEntityException& toCatch)        {            //  If we were in some markup when this happened, then its a            //  partial markup error.            if (inMarkup)                emitError(XMLErrs::PartialMarkupInEntity);            // Send an end of entity reference event            if (fDocHandler)                fDocHandler->endEntityReference(toCatch.getEntity());            inMarkup = false;        }    }    // It went ok, so return success    return true;}void DGXMLScanner::scanEndTag(bool& gotData){    //  Assume we will still have data until proven otherwise. It will only    //  ever be false if this is the end of the root element.    gotData = true;    //  Check if the element stack is empty. If so, then this is an unbalanced    //  element (i.e. more ends than starts, perhaps because of bad text    //  causing one to be skipped.)    if (fElemStack.isEmpty())    {        emitError(XMLErrs::MoreEndThanStartTags);        fReaderMgr.skipPastChar(chCloseAngle);        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);    }    //  Pop the stack of the element we are supposed to be ending. Remember    //  that we don't own this. The stack just keeps them and reuses them.    unsigned int uriId = (fDoNamespaces)        ? fElemStack.getCurrentURI() : fEmptyNamespaceId;    //  Pop the stack of the element we are supposed to be ending. Remember    //  that we don't own this. The stack just keeps them and reuses them.    const ElemStack::StackElem* topElem = fElemStack.popTop();    XMLElementDecl *tempElement = topElem->fThisElement;    // See if it was the root element, to avoid multiple calls below    const bool isRoot = fElemStack.isEmpty();    // Make sure that its the end of the element that we expect    if (!fReaderMgr.skippedString(tempElement->getFullName()))    {        emitError        (            XMLErrs::ExpectedEndOfTagX            , tempElement->getFullName()        );        fReaderMgr.skipPastChar(chCloseAngle);        return;    }    // Make sure we are back on the same reader as where we started    if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())        emitError(XMLErrs::PartialTagMarkupError);    // Skip optional whitespace    fReaderMgr.skipPastSpaces();    // Make sure we find the closing bracket    if (!fReaderMgr.skippedChar(chCloseAngle))    {        emitError        (            XMLErrs::UnterminatedEndTag            , topElem->fThisElement->getFullName()        );    }    //  If validation is enabled, then lets pass him the list of children and    //  this element and let him validate it.    if (fValidate)    {       //       // XML1.0-3rd       // Validity Constraint:        // The declaration matches EMPTY and the element has no content (not even        // entity references, comments, PIs or white space).       //       if ( (topElem->fCommentOrPISeen)               &&            (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty))       {           fValidator->emitError               (               XMLValid::EmptyElemHasContent               , topElem->fThisElement->getFullName()               );       }       //       // XML1.0-3rd       // Validity Constraint:        //        // The declaration matches children and the sequence of child elements        // belongs to the language generated by the regular expression in the        // content model, with optional white space, comments and PIs        // (i.e. markup matching production [27] Misc) between the start-tag and        // the first child element, between child elements, or between the last        // child element and the end-tag.        //       // Note that        //    a CDATA section containing only white space or        //    a reference to an entity whose replacement text is character references        //       expanding to white space do not match the nonterminal S, and hence        //       cannot appear in these positions; however,       //    a reference to an internal entity with a literal value consisting        //       of character references expanding to white space does match S,        //       since its replacement text is the white space resulting from expansion        //       of the character references.       //       if ( (topElem->fReferenceEscaped)               &&            (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children))       {           fValidator->emitError               (               XMLValid::ElemChildrenHasInvalidWS               , topElem->fThisElement->getFullName()               );       }        int res = fValidator->checkContent        (            topElem->fThisElement            , topElem->fChildren            , topElem->fChildCount        );        if (res >= 0)        {            //  One of the elements is not valid for the content. NOTE that            //  if no children were provided but the content model requires            //  them, it comes back with a zero value. But we cannot use that            //  to index the child array in this case, and have to put out a            //  special message.            if (!topElem->fChildCount)            {                fValidator->emitError                (                    XMLValid::EmptyNotValidForContent                    , topElem->fThisElement->getFormattedContentModel()                );            }            else if ((unsigned int)res >= topElem->fChildCount)            {                fValidator->emitError                (                    XMLValid::NotEnoughElemsForCM                    , topElem->fThisElement->getFormattedContentModel()                );            }            else            {                fValidator->emitError                (                    XMLValid::ElementNotValidForContent                    , topElem->fChildren[res]->getRawName()                    , topElem->fThisElement->getFormattedContentModel()                );            }        }    }    // If we have a doc handler, tell it about the end tag    if (fDocHandler)    {        fDocHandler->endElement        (            *topElem->fThisElement            , uriId            , isRoot            , (fDoNamespaces)                ? topElem->fThisElement->getElementName()->getPrefix()                : XMLUni::fgZeroLenString        );    }    // If this was the root, then done with content    gotData = !isRoot;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?