sgxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,881 行 · 第 1/5 页

CPP
1,881
字号
                //  this tells us, we will call something to handle that kind                //  of thing.                unsigned int orgReader;                const XMLTokens curToken = senseNextToken(orgReader);                //  Handle character data and end of file specially. Char data                //  is not markup so we don't want to handle it in the loop                //  below.                if (curToken == Token_CharData)                {                    //  Scan the character data and call appropriate events. Let                    //  him use our local character data buffer for efficiency.                    scanCharData(fCDataBuf);                    continue;                }                else if (curToken == Token_EOF)                {                    //  The element stack better be empty at this point or we                    //  ended prematurely before all elements were closed.                    if (!fElemStack.isEmpty())                    {                        const ElemStack::StackElem* topElem = fElemStack.popTop();                        emitError                        (                            XMLErrs::EndedWithTagsOnStack                            , topElem->fThisElement->getFullName()                        );                    }                    // Its the end of file, so clear the got data flag                    gotData = false;                    continue;                }                // We are in some sort of markup now                inMarkup = true;                //  According to the token we got, call the appropriate                //  scanning method.                switch(curToken)                {                    case Token_CData :                        // Make sure we are within content                        if (fElemStack.isEmpty())                            emitError(XMLErrs::CDATAOutsideOfContent);                        scanCDSection();                        break;                    case Token_Comment :                        scanComment();                        break;                    case Token_EndTag :                        scanEndTag(gotData);                        break;                    case Token_PI :                        scanPI();                        break;                    case Token_StartTag :                        scanStartTag(gotData);                        break;                    default :                        fReaderMgr.skipToChar(chOpenAngle);                        break;                }                if (orgReader != fReaderMgr.getCurrentReaderNum())                    emitError(XMLErrs::PartialMarkupInEntity);                // And we are back out of markup again                inMarkup = false;            }        }        catch(const EndOfEntityException& toCatch)        {            //  If we were in some markup when this happened, then its a            //  partial markup error.            if (inMarkup)                emitError(XMLErrs::PartialMarkupInEntity);            // Send an end of entity reference event            if (fDocHandler)                fDocHandler->endEntityReference(toCatch.getEntity());            inMarkup = false;        }    }    // It went ok, so return success    return true;}void SGXMLScanner::scanEndTag(bool& gotData){    //  Assume we will still have data until proven otherwise. It will only    //  ever be false if this is the end of the root element.    gotData = true;    //  Check if the element stack is empty. If so, then this is an unbalanced    //  element (i.e. more ends than starts, perhaps because of bad text    //  causing one to be skipped.)    if (fElemStack.isEmpty())    {        emitError(XMLErrs::MoreEndThanStartTags);        fReaderMgr.skipPastChar(chCloseAngle);        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);    }    //  Pop the stack of the element we are supposed to be ending. Remember    //  that we don't own this. The stack just keeps them and reuses them.    unsigned int uriId = (fDoNamespaces)        ? fElemStack.getCurrentURI() : fEmptyNamespaceId;    // Make sure that its the end of the element that we expect    XMLCh *elemName = fElemStack.getCurrentSchemaElemName();    const ElemStack::StackElem* topElem = fElemStack.popTop();     XMLElementDecl *tempElement = topElem->fThisElement;     if (!fReaderMgr.skippedString(elemName))    {        emitError        (            XMLErrs::ExpectedEndOfTagX            , elemName        );        fReaderMgr.skipPastChar(chCloseAngle);        return;    }    // See if it was the root element, to avoid multiple calls below    const bool isRoot = fElemStack.isEmpty();    fPSVIElemContext.fErrorOccurred = fErrorStack->pop();    // Make sure we are back on the same reader as where we started    if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())        emitError(XMLErrs::PartialTagMarkupError);    // Skip optional whitespace    fReaderMgr.skipPastSpaces();    // Make sure we find the closing bracket    if (!fReaderMgr.skippedChar(chCloseAngle))    {        emitError        (            XMLErrs::UnterminatedEndTag            , topElem->fThisElement->getFullName()        );    }    if (fValidate && topElem->fThisElement->isDeclared())    {        fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();        if(!fPSVIElemContext.fCurrentTypeInfo)            fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();        else            fPSVIElemContext.fCurrentDV = 0;        if (fPSVIHandler)        {            fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();            if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))                fPSVIElemContext.fNormalizedValue = 0;        }    }    else    {        fPSVIElemContext.fCurrentDV = 0;        fPSVIElemContext.fCurrentTypeInfo = 0;        fPSVIElemContext.fNormalizedValue = 0;    }    //  If validation is enabled, then lets pass him the list of children and    //  this element and let him validate it.    DatatypeValidator* psviMemberType = 0;    if (fValidate)    {        int res = fValidator->checkContent        (            topElem->fThisElement            , topElem->fChildren            , topElem->fChildCount        );        if (res >= 0)        {            //  One of the elements is not valid for the content. NOTE that            //  if no children were provided but the content model requires            //  them, it comes back with a zero value. But we cannot use that            //  to index the child array in this case, and have to put out a            //  special message.            if (!topElem->fChildCount)            {                fValidator->emitError                (                    XMLValid::EmptyNotValidForContent                    , topElem->fThisElement->getFormattedContentModel()                );            }            else if ((unsigned int)res >= topElem->fChildCount)            {                fValidator->emitError                (                    XMLValid::NotEnoughElemsForCM                    , topElem->fThisElement->getFormattedContentModel()                );            }            else            {                fValidator->emitError                (                    XMLValid::ElementNotValidForContent                    , topElem->fChildren[res]->getRawName()                    , topElem->fThisElement->getFormattedContentModel()                );            }                    }        // update PSVI info        if (((SchemaValidator*) fValidator)->getErrorOccurred())            fPSVIElemContext.fErrorOccurred = true;        else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)            psviMemberType = fValidationContext->getValidatingMemberType();        if (fPSVIHandler)        {            fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();            if(fPSVIElemContext.fIsSpecified)                fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();        }        // call matchers and de-activate context        if (toCheckIdentityConstraint())        {            fICHandler->deactivateContext                        (                         (SchemaElementDecl *) topElem->fThisElement                       , fContent.getRawBuffer()                        );        }    }    if (fPSVIHandler)    {        endElementPSVI        (            (SchemaElementDecl*)topElem->fThisElement, psviMemberType        );    }    // now we can reset the datatype buffer, since the     // application has had a chance to copy the characters somewhere else    ((SchemaValidator *)fValidator)->clearDatatypeBuffer();    // If we have a doc handler, tell it about the end tag    if (fDocHandler)    {        fDocHandler->endElement        (            *topElem->fThisElement            , uriId            , isRoot            , fPrefixBuf.getRawBuffer()        );    }    if (!isRoot)    {        // update error information        fErrorStack->push(fErrorStack->pop() || fPSVIElemContext.fErrorOccurred);    }    // If this was the root, then done with content    gotData = !isRoot;    if (gotData) {        // Restore the grammar        fGrammar = fElemStack.getCurrentGrammar();        fGrammarType = fGrammar->getGrammarType();        fValidator->setGrammar(fGrammar);        // Restore the validation flag        fValidate = fElemStack.getValidationFlag();    }}//  This method handles the high level logic of scanning the DOCType//  declaration. This calls the DTDScanner and kicks off both the scanning of//  the internal subset and the scanning of the external subset, if any.////  When we get here the '<!DOCTYPE' part has already been scanned, which is//  what told us that we had a doc type decl to parse.void SGXMLScanner::scanDocTypeDecl(){    // Just skips over it    // REVISIT: Should we issue a warning    static const XMLCh doctypeIE[] =    {            chOpenSquare, chCloseAngle, chNull    };    XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE);    if (nextCh == chOpenSquare)        fReaderMgr.skipPastChar(chCloseSquare);    fReaderMgr.skipPastChar(chCloseAngle);}//  This method is called to scan a start tag when we are processing//  namespaces. This method is called after we've scanned the < of a//  start tag. So we have to get the element name, then scan the attributes, //  after which we are either going to see >, />, or attributes followed //  by one of those sequences.bool SGXMLScanner::scanStartTag(bool& gotData){    //  Assume we will still have data until proven otherwise. It will only    //  ever be false if this is the root and its empty.    gotData = true;    // Reset element content    fContent.reset();    //  The current position is after the open bracket, so we need to read in    //  in the element name.    if (!fReaderMgr.getName(fQNameBuf))    {        emitError(XMLErrs::ExpectedElementName);        fReaderMgr.skipToChar(chOpenAngle);        return false;    }    // See if its the root element    const bool isRoot = fElemStack.isEmpty();    // Skip any whitespace after the name    fReaderMgr.skipPastSpaces();    //  First we have to do the rawest attribute scan. We don't do any    //  normalization of them at all, since we don't know yet what type they    //  might be (since we need the element decl in order to do that.)    const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();    bool isEmpty;    unsigned int attCount = rawAttrScan    (        qnameRawBuf        , *fRawAttrList        , isEmpty    );    // save the contentleafname and currentscope before addlevel, for later use    ContentLeafNameTypeVector* cv = 0;    XMLContentModel* cm = 0;    int currentScope = Grammar::TOP_LEVEL_SCOPE;    bool laxThisOne = false;    if (!isRoot)    {        // schema validator will have correct type if validating        SchemaElementDecl* tempElement = (SchemaElementDecl*)            fElemStack.topElement()->fThisElement;        SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();        ComplexTypeInfo *currType = 0;        if (fValidate)        {            currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();            if (currType)                modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();            else // something must have gone wrong                modelType = SchemaElementDecl::Any;        }        else

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?