igxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页

CPP
1,855
字号
/* * Copyright 2002,2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: IGXMLScanner.cpp,v 1.75 2004/09/21 16:10:47 peiyongz Exp $ */// ---------------------------------------------------------------------------//  Includes// ---------------------------------------------------------------------------#include <xercesc/internal/IGXMLScanner.hpp>#include <xercesc/util/RuntimeException.hpp>#include <xercesc/util/HashPtr.hpp>#include <xercesc/util/UnexpectedEOFException.hpp>#include <xercesc/sax/InputSource.hpp>#include <xercesc/framework/XMLDocumentHandler.hpp>#include <xercesc/framework/XMLEntityHandler.hpp>#include <xercesc/framework/XMLPScanToken.hpp>#include <xercesc/internal/EndOfEntityException.hpp>#include <xercesc/framework/MemoryManager.hpp>#include <xercesc/framework/XMLGrammarPool.hpp>#include <xercesc/framework/XMLDTDDescription.hpp>#include <xercesc/framework/psvi/PSVIHandler.hpp>#include <xercesc/framework/psvi/PSVIAttributeList.hpp>#include <xercesc/validators/common/GrammarResolver.hpp>#include <xercesc/validators/DTD/DocTypeHandler.hpp>#include <xercesc/validators/DTD/DTDScanner.hpp>#include <xercesc/validators/DTD/DTDValidator.hpp>#include <xercesc/validators/schema/SchemaValidator.hpp>#include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>#include <xercesc/validators/schema/identity/IC_Selector.hpp>#include <xercesc/util/OutOfMemoryException.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------//  IGXMLScanner: Constructors and Destructor// ---------------------------------------------------------------------------IGXMLScanner::IGXMLScanner( XMLValidator* const  valToAdopt                          , GrammarResolver* const grammarResolver                          , MemoryManager* const manager) :    XMLScanner(valToAdopt, grammarResolver, manager)    , fSeeXsi(false)    , fGrammarType(Grammar::UnKnown)    , fElemStateSize(16)    , fElemState(0)    , fContent(1023, manager)    , fRawAttrList(0)    , fDTDValidator(0)    , fSchemaValidator(0)    , fDTDGrammar(0)    , fICHandler(0)    , fLocationPairs(0)    , fDTDElemNonDeclPool(0)    , fSchemaElemNonDeclPool(0)    , fElemCount(0)    , fAttDefRegistry(0)    , fUndeclaredAttrRegistry(0)    , fUndeclaredAttrRegistryNS(0)    , fPSVIAttrList(0)    , fModel(0)    , fPSVIElement(0)    , fErrorStack(0)        {    try    {         commonInit();         // use fDTDValidator as the default validator         if (!valToAdopt)             fValidator = fDTDValidator;    }    catch(const OutOfMemoryException&)    {        throw;    }    catch(...)    {        cleanUp();        throw;    }}IGXMLScanner::IGXMLScanner( XMLDocumentHandler* const docHandler                          , DocTypeHandler* const     docTypeHandler                          , XMLEntityHandler* const   entityHandler                          , XMLErrorReporter* const   errHandler                          , XMLValidator* const       valToAdopt                          , GrammarResolver* const    grammarResolver                          , MemoryManager* const      manager) :    XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)    , fSeeXsi(false)    , fGrammarType(Grammar::UnKnown)    , fElemStateSize(16)    , fElemState(0)    , fContent(1023, manager)    , fRawAttrList(0)    , fDTDValidator(0)    , fSchemaValidator(0)    , fDTDGrammar(0)    , fICHandler(0)    , fLocationPairs(0)    , fDTDElemNonDeclPool(0)    , fSchemaElemNonDeclPool(0)    , fElemCount(0)    , fAttDefRegistry(0)    , fUndeclaredAttrRegistry(0)    , fUndeclaredAttrRegistryNS(0)    , fPSVIAttrList(0)    , fModel(0)    , fPSVIElement(0)    , fErrorStack(0)       {    try    {	        commonInit();        //use fDTDValidator as the default validator        if (!valToAdopt)            fValidator = fDTDValidator;    }    catch(const OutOfMemoryException&)    {        throw;    }    catch(...)    {        cleanUp();        throw;    }}IGXMLScanner::~IGXMLScanner(){    cleanUp();}// ---------------------------------------------------------------------------//  XMLScanner: Getter methods// ---------------------------------------------------------------------------NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool(){    if(!fDTDGrammar)        return 0;    return fDTDGrammar->getEntityDeclPool();}const NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool() const{    if(!fDTDGrammar)        return 0;    return fDTDGrammar->getEntityDeclPool();}// ---------------------------------------------------------------------------//  IGXMLScanner: Main entry point to scan a document// ---------------------------------------------------------------------------void IGXMLScanner::scanDocument(const InputSource& src){    //  Bump up the sequence id for this parser instance. This will invalidate    //  any previous progressive scan tokens.    fSequenceId++;    try    {        //  Reset the scanner and its plugged in stuff for a new run. This        //  resets all the data structures, creates the initial reader and        //  pushes it on the stack, and sets up the base document path.        scanReset(src);        // If we have a document handler, then call the start document        if (fDocHandler)            fDocHandler->startDocument();        //  Scan the prolog part, which is everything before the root element        //  including the DTD subsets.        scanProlog();        //  If we got to the end of input, then its not a valid XML file.        //  Else, go on to scan the content.        if (fReaderMgr.atEOF())        {            emitError(XMLErrs::EmptyMainEntity);        }        else        {            // Scan content, and tell it its not an external entity            if (scanContent())            {                // Do post-parse validation if required                if (fValidate)                {                    //  We handle ID reference semantics at this level since                    //  its required by XML 1.0.                    checkIDRefs();                    // Then allow the validator to do any extra stuff it wants//                    fValidator->postParseValidation();                }                // That went ok, so scan for any miscellaneous stuff                if (!fReaderMgr.atEOF())                    scanMiscellaneous();            }        }        // If we have a document handler, then call the end document        if (fDocHandler)            fDocHandler->endDocument();        //cargill debug:        //fGrammarResolver->getXSModel();        // Reset the reader manager to close all files, sockets, etc...        fReaderMgr.reset();    }    //  NOTE:    //    //  In all of the error processing below, the emitError() call MUST come    //  before the flush of the reader mgr, or it will fail because it tries    //  to find out the position in the XML source of the error.    catch(const XMLErrs::Codes)    {        // This is a 'first fatal error' type exit, so reset and fall through        fReaderMgr.reset();    }    catch(const XMLValid::Codes)    {        // This is a 'first fatal error' type exit, so reset and fall through        fReaderMgr.reset();    }    catch(const XMLException& excToCatch)    {        //  Emit the error and catch any user exception thrown from here. Make        //  sure in all cases we flush the reader manager.        fInException = true;        try        {            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)                emitError                (                    XMLErrs::XMLException_Warning                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)                emitError                (                    XMLErrs::XMLException_Fatal                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else                emitError                (                    XMLErrs::XMLException_Error                    , excToCatch.getType()                    , excToCatch.getMessage()                );        }        catch(const OutOfMemoryException&)        {            throw;        }        catch(...)        {            // Flush the reader manager and rethrow user's error            fReaderMgr.reset();            throw;        }        // If it returned, then reset the reader manager and fall through        fReaderMgr.reset();    }    catch(const OutOfMemoryException&)    {        throw;    }    catch(...)    {        // Reset and rethrow        fReaderMgr.reset();        throw;    }}bool IGXMLScanner::scanNext(XMLPScanToken& token){    // Make sure this token is still legal    if (!isLegalToken(token))        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);    // Find the next token and remember the reader id    unsigned int orgReader;    XMLTokens curToken;    bool retVal = true;    try    {        while (true)        {            //  We have to handle any end of entity exceptions that happen here.            //  We could be at the end of X nested entities, each of which will            //  generate an end of entity exception as we try to move forward.            try            {                curToken = senseNextToken(orgReader);                break;            }            catch(const EndOfEntityException& toCatch)            {                // Send an end of entity reference event                if (fDocHandler)                    fDocHandler->endEntityReference(toCatch.getEntity());            }        }        if (curToken == Token_CharData)        {            scanCharData(fCDataBuf);        }        else if (curToken == Token_EOF)        {            if (!fElemStack.isEmpty())            {                const ElemStack::StackElem* topElem = fElemStack.popTop();                emitError                (                    XMLErrs::EndedWithTagsOnStack                    , topElem->fThisElement->getFullName()                );            }            retVal = false;        }        else        {            // Its some sort of markup            bool gotData = true;            switch(curToken)            {                case Token_CData :                    // Make sure we are within content                    if (fElemStack.isEmpty())                        emitError(XMLErrs::CDATAOutsideOfContent);                    scanCDSection();                    break;                case Token_Comment :                    scanComment();                    break;                case Token_EndTag :

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?