wfxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,883 行 · 第 1/5 页

CPP
1,883
字号
/* * Copyright 2002,2003-2004 The Apache Software Foundation. *  * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *  *      http://www.apache.org/licenses/LICENSE-2.0 *  * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//*  * $Id: WFXMLScanner.cpp,v 1.25 2004/09/28 21:27:38 peiyongz Exp $ */// ---------------------------------------------------------------------------//  Includes// ---------------------------------------------------------------------------#include <xercesc/internal/WFXMLScanner.hpp>#include <xercesc/util/Janitor.hpp>#include <xercesc/util/RuntimeException.hpp>#include <xercesc/util/UnexpectedEOFException.hpp>#include <xercesc/sax/InputSource.hpp>#include <xercesc/framework/XMLDocumentHandler.hpp>#include <xercesc/framework/XMLEntityHandler.hpp>#include <xercesc/framework/XMLPScanToken.hpp>#include <xercesc/framework/XMLValidityCodes.hpp>#include <xercesc/internal/EndOfEntityException.hpp>#include <xercesc/util/OutOfMemoryException.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------//  WFXMLScanner: Constructors and Destructor// ---------------------------------------------------------------------------WFXMLScanner::WFXMLScanner( XMLValidator* const  valToAdopt                          , GrammarResolver* const grammarResolver                          , MemoryManager* const manager) :    XMLScanner(valToAdopt, grammarResolver, manager)    , fElementIndex(0)    , fElements(0)    , fEntityTable(0)    , fAttrNameHashList(0)    , fAttrNSList(0)    , fElementLookup(0){    try    {        commonInit();    }    catch(const OutOfMemoryException&)    {        throw;    }    catch(...)    {        cleanUp();        throw;    }}WFXMLScanner::WFXMLScanner( XMLDocumentHandler* const docHandler                          , DocTypeHandler* const     docTypeHandler                          , XMLEntityHandler* const   entityHandler                          , XMLErrorReporter* const   errHandler                          , XMLValidator* const       valToAdopt                          , GrammarResolver* const    grammarResolver                          , MemoryManager* const      manager) :    XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)    , fElementIndex(0)    , fElements(0)    , fEntityTable(0)    , fAttrNameHashList(0)    , fAttrNSList(0)    , fElementLookup(0){    try    {	        commonInit();    }    catch(const OutOfMemoryException&)    {        throw;    }    catch(...)    {        cleanUp();        throw;    }}WFXMLScanner::~WFXMLScanner(){    cleanUp();}// ---------------------------------------------------------------------------//  XMLScanner: Getter methods// ---------------------------------------------------------------------------NameIdPool<DTDEntityDecl>* WFXMLScanner::getEntityDeclPool(){    return 0;}const NameIdPool<DTDEntityDecl>* WFXMLScanner::getEntityDeclPool() const{    return 0;}// ---------------------------------------------------------------------------//  WFXMLScanner: Main entry point to scan a document// ---------------------------------------------------------------------------void WFXMLScanner::scanDocument(const InputSource& src){    //  Bump up the sequence id for this parser instance. This will invalidate    //  any previous progressive scan tokens.    fSequenceId++;    try    {        //  Reset the scanner and its plugged in stuff for a new run. This        //  resets all the data structures, creates the initial reader and        //  pushes it on the stack, and sets up the base document path.        scanReset(src);        // If we have a document handler, then call the start document        if (fDocHandler)            fDocHandler->startDocument();        //  Scan the prolog part, which is everything before the root element        //  including the DTD subsets.        scanProlog();        //  If we got to the end of input, then its not a valid XML file.        //  Else, go on to scan the content.        if (fReaderMgr.atEOF())        {            emitError(XMLErrs::EmptyMainEntity);        }        else        {            // Scan content, and tell it its not an external entity            if (scanContent())            {                // That went ok, so scan for any miscellaneous stuff                if (!fReaderMgr.atEOF())                    scanMiscellaneous();            }        }        // If we have a document handler, then call the end document        if (fDocHandler)            fDocHandler->endDocument();        // Reset the reader manager to close all files, sockets, etc...        fReaderMgr.reset();    }    //  NOTE:    //    //  In all of the error processing below, the emitError() call MUST come    //  before the flush of the reader mgr, or it will fail because it tries    //  to find out the position in the XML source of the error.    catch(const XMLErrs::Codes)    {        // This is a 'first fatal error' type exit, so reset and fall through        fReaderMgr.reset();    }    catch(const XMLValid::Codes)    {        // This is a 'first fatal error' type exit, so reset and fall through        fReaderMgr.reset();    }    catch(const XMLException& excToCatch)    {        //  Emit the error and catch any user exception thrown from here. Make        //  sure in all cases we flush the reader manager.        fInException = true;        try        {            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)                emitError                (                    XMLErrs::XMLException_Warning                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)                emitError                (                    XMLErrs::XMLException_Fatal                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else                emitError                (                    XMLErrs::XMLException_Error                    , excToCatch.getType()                    , excToCatch.getMessage()                );        }        catch(const OutOfMemoryException&)        {            throw;        }        catch(...)        {            // Flush the reader manager and rethrow user's error            fReaderMgr.reset();            throw;        }        // If it returned, then reset the reader manager and fall through        fReaderMgr.reset();    }    catch(const OutOfMemoryException&)    {        throw;    }    catch(...)    {        // Reset and rethrow        fReaderMgr.reset();        throw;    }}bool WFXMLScanner::scanNext(XMLPScanToken& token){    // Make sure this token is still legal    if (!isLegalToken(token))        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);    // Find the next token and remember the reader id    unsigned int orgReader;    XMLTokens curToken;    bool retVal = true;    try    {        while (true)        {            //  We have to handle any end of entity exceptions that happen here.            //  We could be at the end of X nested entities, each of which will            //  generate an end of entity exception as we try to move forward.            try            {                curToken = senseNextToken(orgReader);                break;            }            catch(const EndOfEntityException& toCatch)            {                // Send an end of entity reference event                if (fDocHandler)                    fDocHandler->endEntityReference(toCatch.getEntity());            }        }        if (curToken == Token_CharData)        {            scanCharData(fCDataBuf);        }        else if (curToken == Token_EOF)        {            if (!fElemStack.isEmpty())            {                const ElemStack::StackElem* topElem = fElemStack.popTop();                emitError                (                    XMLErrs::EndedWithTagsOnStack                    , topElem->fThisElement->getFullName()                );            }            retVal = false;        }        else        {            // Its some sort of markup            bool gotData = true;            switch(curToken)            {                case Token_CData :                    // Make sure we are within content                    if (fElemStack.isEmpty())                        emitError(XMLErrs::CDATAOutsideOfContent);                    scanCDSection();                    break;                case Token_Comment :                    scanComment();                    break;                case Token_EndTag :                    scanEndTag(gotData);                    break;                case Token_PI :                    scanPI();                    break;                case Token_StartTag :                    if (fDoNamespaces)                        scanStartTagNS(gotData);                    else                        scanStartTag(gotData);                    break;                default :                    fReaderMgr.skipToChar(chOpenAngle);                    break;            }            if (orgReader != fReaderMgr.getCurrentReaderNum())                emitError(XMLErrs::PartialMarkupInEntity);            // If we hit the end, then do the miscellaneous part            if (!gotData)            {                // That went ok, so scan for any miscellaneous stuff                scanMiscellaneous();                if (fDocHandler)                    fDocHandler->endDocument();            }        }    }    //  NOTE:    //    //  In all of the error processing below, the emitError() call MUST come    //  before the flush of the reader mgr, or it will fail because it tries    //  to find out the position in the XML source of the error.    catch(const XMLErrs::Codes)    {        // This is a 'first failure' exception, so reset and return failure        fReaderMgr.reset();        return false;    }    catch(const XMLValid::Codes)    {        // This is a 'first fatal error' type exit, so reset and reuturn failure        fReaderMgr.reset();        return false;    }    catch(const XMLException& excToCatch)    {        //  Emit the error and catch any user exception thrown from here. Make        //  sure in all cases we flush the reader manager.        fInException = true;        try        {            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)                emitError                (                    XMLErrs::XMLException_Warning                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)                emitError                (                    XMLErrs::XMLException_Fatal                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else                emitError                (

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?