igxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页

CPP
1,855
字号
                );                toFill.addElement(curPair);            }             else            {                curPair = toFill.elementAt(attCount);                curPair->set(fAttNameBuf.getRawBuffer(), fAttValueBuf.getRawBuffer());            }            // And bump the count of attributes we've gotten            attCount++;            // And go to the top again for another attribute            continue;        }        //  It was some special case character so do all of the checks and        //  deal with it.        if (!nextCh)            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);        if (nextCh == chForwardSlash)        {            fReaderMgr.getNextChar();            isEmpty = true;            if (!fReaderMgr.skippedChar(chCloseAngle))                emitError(XMLErrs::UnterminatedStartTag, elemName);            break;        }        else if (nextCh == chCloseAngle)        {            fReaderMgr.getNextChar();            break;        }        else if (nextCh == chOpenAngle)        {            //  Check for this one specially, since its going to be common            //  and it is kind of auto-recovering since we've already hit the            //  next open bracket, which is what we would have seeked to (and            //  skipped this whole tag.)            emitError(XMLErrs::UnterminatedStartTag, elemName);            break;        }        else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))        {            //  Check for this one specially, which is probably a missing            //  attribute name, e.g. ="value". Just issue expected name            //  error and eat the quoted string, then jump back to the            //  top again.            emitError(XMLErrs::ExpectedAttrName);            fReaderMgr.getNextChar();            fReaderMgr.skipQuotedString(nextCh);            fReaderMgr.skipPastSpaces();            continue;        }    }    return attCount;}//  This method will kick off the scanning of the primary content of the//  document, i.e. the elements.bool IGXMLScanner::scanContent(){    //  Go into a loop until we hit the end of the root element, or we fall    //  out because there is no root element.    //    //  We have to do kind of a deeply nested double loop here in order to    //  avoid doing the setup/teardown of the exception handler on each    //  round. Doing it this way we only do it when an exception actually    //  occurs.    bool gotData = true;    bool inMarkup = false;    while (gotData)    {        try        {            while (gotData)            {                //  Sense what the next top level token is. According to what                //  this tells us, we will call something to handle that kind                //  of thing.                unsigned int orgReader;                const XMLTokens curToken = senseNextToken(orgReader);                //  Handle character data and end of file specially. Char data                //  is not markup so we don't want to handle it in the loop                //  below.                if (curToken == Token_CharData)                {                    //  Scan the character data and call appropriate events. Let                    //  him use our local character data buffer for efficiency.                    scanCharData(fCDataBuf);                    continue;                }                else if (curToken == Token_EOF)                {                    //  The element stack better be empty at this point or we                    //  ended prematurely before all elements were closed.                    if (!fElemStack.isEmpty())                    {                        const ElemStack::StackElem* topElem = fElemStack.popTop();                        emitError                        (                            XMLErrs::EndedWithTagsOnStack                            , topElem->fThisElement->getFullName()                        );                    }                    // Its the end of file, so clear the got data flag                    gotData = false;                    continue;                }                // We are in some sort of markup now                inMarkup = true;                //  According to the token we got, call the appropriate                //  scanning method.                switch(curToken)                {                    case Token_CData :                        // Make sure we are within content                        if (fElemStack.isEmpty())                            emitError(XMLErrs::CDATAOutsideOfContent);                        scanCDSection();                        break;                    case Token_Comment :                        scanComment();                        break;                    case Token_EndTag :                        scanEndTag(gotData);                        break;                    case Token_PI :                        scanPI();                        break;                    case Token_StartTag :                        if (fDoNamespaces)                            scanStartTagNS(gotData);                        else                            scanStartTag(gotData);                        break;                    default :                        fReaderMgr.skipToChar(chOpenAngle);                        break;                }                if (orgReader != fReaderMgr.getCurrentReaderNum())                    emitError(XMLErrs::PartialMarkupInEntity);                // And we are back out of markup again                inMarkup = false;            }        }        catch(const EndOfEntityException& toCatch)        {            //  If we were in some markup when this happened, then its a            //  partial markup error.            if (inMarkup)                emitError(XMLErrs::PartialMarkupInEntity);            // Send an end of entity reference event            if (fDocHandler)                fDocHandler->endEntityReference(toCatch.getEntity());            inMarkup = false;        }    }    // It went ok, so return success    return true;}void IGXMLScanner::scanEndTag(bool& gotData){    //  Assume we will still have data until proven otherwise. It will only    //  ever be false if this is the end of the root element.    gotData = true;    //  Check if the element stack is empty. If so, then this is an unbalanced    //  element (i.e. more ends than starts, perhaps because of bad text    //  causing one to be skipped.)    if (fElemStack.isEmpty())    {        emitError(XMLErrs::MoreEndThanStartTags);        fReaderMgr.skipPastChar(chCloseAngle);        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);    }    //  Pop the stack of the element we are supposed to be ending. Remember    //  that we don't own this. The stack just keeps them and reuses them.    unsigned int uriId = (fDoNamespaces)        ? fElemStack.getCurrentURI() : fEmptyNamespaceId;    // these get initialized below    const ElemStack::StackElem* topElem = 0;    XMLElementDecl *tempElement = 0;    XMLCh *elemName = 0;    // Make sure that its the end of the element that we expect    // special case for schema validation, whose element decls,    // obviously don't contain prefix information    if(fGrammarType == Grammar::SchemaGrammarType)    {        elemName = fElemStack.getCurrentSchemaElemName();        topElem = fElemStack.popTop();         tempElement = topElem->fThisElement;     }    else    {        topElem = fElemStack.popTop();         tempElement = topElem->fThisElement;        elemName = (XMLCh *)tempElement->getFullName();    }    if (!fReaderMgr.skippedString(elemName))    {        emitError        (            XMLErrs::ExpectedEndOfTagX            , elemName        );        fReaderMgr.skipPastChar(chCloseAngle);        return;    }    // See if it was the root element, to avoid multiple calls below    const bool isRoot = fElemStack.isEmpty();    // Make sure we are back on the same reader as where we started    if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())        emitError(XMLErrs::PartialTagMarkupError);    // Skip optional whitespace    fReaderMgr.skipPastSpaces();    // Make sure we find the closing bracket    if (!fReaderMgr.skippedChar(chCloseAngle))    {        emitError        (            XMLErrs::UnterminatedEndTag            , topElem->fThisElement->getFullName()        );    }    if (fGrammarType == Grammar::SchemaGrammarType)    {        // reset error occurred        fPSVIElemContext.fErrorOccurred = fErrorStack->pop();        if (fValidate && topElem->fThisElement->isDeclared())        {            fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();            if(!fPSVIElemContext.fCurrentTypeInfo)                fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();            else                fPSVIElemContext.fCurrentDV = 0;            if(fPSVIHandler)            {                fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();                if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))                    fPSVIElemContext.fNormalizedValue = 0;            }        }        else        {            fPSVIElemContext.fCurrentDV = 0;            fPSVIElemContext.fCurrentTypeInfo = 0;            fPSVIElemContext.fNormalizedValue = 0;        }    }    //  If validation is enabled, then lets pass him the list of children and    //  this element and let him validate it.    DatatypeValidator* psviMemberType = 0;    if (fValidate)    {       //       // XML1.0-3rd       // Validity Constraint:        // The declaration matches EMPTY and the element has no content (not even        // entity references, comments, PIs or white space).       //       if ( (fGrammarType == Grammar::DTDGrammarType) &&            (topElem->fCommentOrPISeen)               &&            (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty))       {           fValidator->emitError               (               XMLValid::EmptyElemHasContent               , topElem->fThisElement->getFullName()               );       }       //       // XML1.0-3rd       // Validity Constraint:        //        // The declaration matches children and the sequence of child elements        // belongs to the language generated by the regular expression in the        // content model, with optional white space, comments and PIs        // (i.e. markup matching production [27] Misc) between the start-tag and        // the first child element, between child elements, or between the last        // child element and the end-tag.        //       // Note that        //    a CDATA section containing only white space or        //    a reference to an entity whose replacement text is character references        //       expanding to white space do not match the nonterminal S, and hence        //       cannot appear in these positions; however,       //    a reference to an internal entity with a literal value consisting        //       of character references expanding to white space does match S,        //       since its replacement text is the white space resulting from expansion        //       of the character references.       //       if ( (fGrammarType == Grammar::DTDGrammarType)  &&            (topElem->fReferenceEscaped)               &&            (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children))       {           fValidator->emitError               (               XMLValid::ElemChildrenHasInvalidWS               , topElem->fThisElement->getFullName()               );       }               int res = fValidator->checkContent        (            topElem->fThisElement            , topElem->fChildren            , topElem->fChildCount        );        if (res >= 0)        {            //  One of the elements is not valid for the content. NOTE that            //  if no children were provided but the content model requires            //  them, it comes back with a zero value. But we cannot use that            //  to index the child array in this case, and have to put out a            //  special message.            if (!topElem->fChildCount)            {                fValidator->emitError                (                    XMLValid::EmptyNotValidForContent                    , topElem->fThisElement->getFormattedContentModel()                );            }            else if ((unsigned int)res >= topElem->fChildCount)            {                fValidator->emitError                (                    XMLValid::NotEnoughElemsForCM                    , topElem->fThisElement->getFormattedContentModel()                );            }            else            {                fValidator->emitError                (                    XMLValid::ElementNotValidForContent                    , topElem->fChildren[res]->getRawName()                    , topElem->fThisElement->getFormattedContentModel()

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?