xsaxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 693 行 · 第 1/2 页

CPP
693
字号
/* * Copyright 2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: XSAXMLScanner.cpp,v 1.2 2004/09/29 23:25:42 cargilld Exp $ */// ---------------------------------------------------------------------------//  Includes// ---------------------------------------------------------------------------#include <xercesc/internal/XSAXMLScanner.hpp>#include <xercesc/sax/InputSource.hpp>#include <xercesc/framework/XMLEntityHandler.hpp>#include <xercesc/framework/XMLDocumentHandler.hpp>#include <xercesc/validators/schema/SchemaValidator.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------//  XSAXMLScanner: Constructors and Destructor// ---------------------------------------------------------------------------XSAXMLScanner::XSAXMLScanner( GrammarResolver* const grammarResolver                            , XMLStringPool* const   uriStringPool                            , SchemaGrammar* const   xsaGrammar                            , MemoryManager* const manager) :    SGXMLScanner(0, grammarResolver, manager){    fSchemaGrammar = xsaGrammar;    setURIStringPool(uriStringPool);}XSAXMLScanner::~XSAXMLScanner(){}// ---------------------------------------------------------------------------//  XSAXMLScanner: SGXMLScanner virtual methods// ---------------------------------------------------------------------------//  This method will kick off the scanning of the primary content of thevoid XSAXMLScanner::scanEndTag(bool& gotData){    //  Assume we will still have data until proven otherwise. It will only    //  ever be false if this is the end of the root element.    gotData = true;    //  Check if the element stack is empty. If so, then this is an unbalanced    //  element (i.e. more ends than starts, perhaps because of bad text    //  causing one to be skipped.)    if (fElemStack.isEmpty())    {        emitError(XMLErrs::MoreEndThanStartTags);        fReaderMgr.skipPastChar(chCloseAngle);        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);    }    //  Pop the stack of the element we are supposed to be ending. Remember    //  that we don't own this. The stack just keeps them and reuses them.    unsigned int uriId = fElemStack.getCurrentURI();    // Make sure that its the end of the element that we expect    XMLCh *elemName = fElemStack.getCurrentSchemaElemName();    const ElemStack::StackElem* topElem = fElemStack.popTop();     XMLElementDecl *tempElement = topElem->fThisElement;     if (!fReaderMgr.skippedString(elemName))    {        emitError        (            XMLErrs::ExpectedEndOfTagX, elemName        );        fReaderMgr.skipPastChar(chCloseAngle);        return;    }    // See if it was the root element, to avoid multiple calls below    const bool isRoot = fElemStack.isEmpty();    // Make sure we are back on the same reader as where we started    if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())        emitError(XMLErrs::PartialTagMarkupError);    // Skip optional whitespace    fReaderMgr.skipPastSpaces();    // Make sure we find the closing bracket    if (!fReaderMgr.skippedChar(chCloseAngle))    {        emitError        (            XMLErrs::UnterminatedEndTag, topElem->fThisElement->getFullName()        );    }    //  If validation is enabled, then lets pass him the list of children and    //  this element and let him validate it.    if (fValidate)    {        int res = fValidator->checkContent        (            topElem->fThisElement, topElem->fChildren, topElem->fChildCount        );        if (res >= 0)        {            //  One of the elements is not valid for the content. NOTE that            //  if no children were provided but the content model requires            //  them, it comes back with a zero value. But we cannot use that            //  to index the child array in this case, and have to put out a            //  special message.            if (!topElem->fChildCount)            {                fValidator->emitError                (                    XMLValid::EmptyNotValidForContent                    , topElem->fThisElement->getFormattedContentModel()                );            }            else if ((unsigned int)res >= topElem->fChildCount)            {                fValidator->emitError                (                    XMLValid::NotEnoughElemsForCM                    , topElem->fThisElement->getFormattedContentModel()                );            }            else            {                fValidator->emitError                (                    XMLValid::ElementNotValidForContent                    , topElem->fChildren[res]->getRawName()                    , topElem->fThisElement->getFormattedContentModel()                );            }                    }    }    // now we can reset the datatype buffer, since the     // application has had a chance to copy the characters somewhere else    ((SchemaValidator *)fValidator)->clearDatatypeBuffer();    // If we have a doc handler, tell it about the end tag    if (fDocHandler)    {        fDocHandler->endElement        (            *topElem->fThisElement, uriId, isRoot, fPrefixBuf.getRawBuffer()        );    }    // If this was the root, then done with content    gotData = !isRoot;    if (gotData) {        // Restore the grammar        fGrammar = fElemStack.getCurrentGrammar();        fGrammarType = fGrammar->getGrammarType();        fValidator->setGrammar(fGrammar);        // Restore the validation flag        fValidate = fElemStack.getValidationFlag();    }}bool XSAXMLScanner::scanStartTag(bool& gotData){    //  Assume we will still have data until proven otherwise. It will only    //  ever be false if this is the root and its empty.    gotData = true;    // Reset element content    fContent.reset();    //  The current position is after the open bracket, so we need to read in    //  in the element name.    if (!fReaderMgr.getName(fQNameBuf))    {        emitError(XMLErrs::ExpectedElementName);        fReaderMgr.skipToChar(chOpenAngle);        return false;    }    // See if its the root element    const bool isRoot = fElemStack.isEmpty();    // Skip any whitespace after the name    fReaderMgr.skipPastSpaces();    //  First we have to do the rawest attribute scan. We don't do any    //  normalization of them at all, since we don't know yet what type they    //  might be (since we need the element decl in order to do that.)    const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();    bool isEmpty;    unsigned int attCount = rawAttrScan(qnameRawBuf, *fRawAttrList, isEmpty);    // save the contentleafname and currentscope before addlevel, for later use    ContentLeafNameTypeVector* cv = 0;    XMLContentModel* cm = 0;    int currentScope = Grammar::TOP_LEVEL_SCOPE;    bool laxThisOne = false;    if (!isRoot)    {        // schema validator will have correct type if validating        SchemaElementDecl* tempElement = (SchemaElementDecl*)            fElemStack.topElement()->fThisElement;        SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();        ComplexTypeInfo *currType = 0;        if (fValidate)        {            currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();            if (currType)                modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();            else // something must have gone wrong                modelType = SchemaElementDecl::Any;        }        else {            currType = tempElement->getComplexTypeInfo();        }        if ((modelType == SchemaElementDecl::Mixed_Simple)          ||  (modelType == SchemaElementDecl::Mixed_Complex)          ||  (modelType == SchemaElementDecl::Children))        {            cm = currType->getContentModel();            cv = cm->getContentLeafNameTypeVector();            currentScope = fElemStack.getCurrentScope();        }        else if (modelType == SchemaElementDecl::Any) {            laxThisOne = true;        }    }    //  Now, since we might have to update the namespace map for this element,    //  but we don't have the element decl yet, we just tell the element stack    //  to expand up to get ready.    unsigned int elemDepth = fElemStack.addLevel();    fElemStack.setValidationFlag(fValidate);    //  Make an initial pass through the list and find any xmlns attributes or    //  schema attributes.    if (attCount)        scanRawAttrListforNameSpaces(attCount);    //  Resolve the qualified name to a URI and name so that we can look up    //  the element decl for this element. We have now update the prefix to    //  namespace map so we should get the correct element now.    int prefixColonPos = -1;    unsigned int uriId = resolveQName    (        qnameRawBuf, fPrefixBuf, ElemStack::Mode_Element, prefixColonPos    );    //if schema, check if we should lax or skip the validation of this element    bool parentValidation = fValidate;    if (cv) {        QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);        // elementDepth will be > 0, as cv is only constructed if element is not        // root.        laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);    }    //  Look up the element now in the grammar. This will get us back a    //  generic element decl object. We tell him to fault one in if he does    //  not find it.    bool wasAdded = false;    const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];    XMLElementDecl* elemDecl = fGrammar->getElemDecl    (        uriId, nameRawBuf, qnameRawBuf, currentScope    );    if (!elemDecl)    {        // URI is different, so we try to switch grammar        if (uriId != fURIStringPool->getId(fGrammar->getTargetNamespace())) {            switchGrammar(getURIText(uriId), laxThisOne);        }        // look for a global element declaration        elemDecl = fGrammar->getElemDecl(            uriId, nameRawBuf, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE        );        if (!elemDecl)        {            // if still not found, look in list of undeclared elements            elemDecl = fElemNonDeclPool->getByKey(                nameRawBuf, uriId, Grammar::TOP_LEVEL_SCOPE);            if (!elemDecl)            {                elemDecl = new (fMemoryManager) SchemaElementDecl                (                    fPrefixBuf.getRawBuffer(), nameRawBuf, uriId                    , SchemaElementDecl::Any, Grammar::TOP_LEVEL_SCOPE                    , fMemoryManager                );                elemDecl->setId                (                    fElemNonDeclPool->put                    (                        (void*)elemDecl->getBaseName(), uriId                        , Grammar::TOP_LEVEL_SCOPE, (SchemaElementDecl*)elemDecl                    )                );                wasAdded = true;            }		}    }    //  We do something different here according to whether we found the    //  element or not.    if (wasAdded || !elemDecl->isDeclared())    {        if (laxThisOne) {            fValidate = false;            fElemStack.setValidationFlag(fValidate);        }        // If validating then emit an error        if (fValidate)        {            // This is to tell the reuse Validator that this element was            // faulted-in, was not an element in the grammar pool originally            elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);            fValidator->emitError            (                XMLValid::ElementNotDefined, elemDecl->getFullName()

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?