dgxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,902 行 · 第 1/5 页
CPP
1,902 行
/* * Copyright 2002, 2003,2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: DGXMLScanner.cpp,v 1.54 2004/09/28 21:27:38 peiyongz Exp $ */// ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <xercesc/internal/DGXMLScanner.hpp>#include <xercesc/util/Janitor.hpp>#include <xercesc/util/RuntimeException.hpp>#include <xercesc/util/UnexpectedEOFException.hpp>#include <xercesc/util/XMLUri.hpp>#include <xercesc/framework/URLInputSource.hpp>#include <xercesc/framework/LocalFileInputSource.hpp>#include <xercesc/framework/XMLDocumentHandler.hpp>#include <xercesc/framework/XMLEntityHandler.hpp>#include <xercesc/framework/XMLPScanToken.hpp>#include <xercesc/framework/XMLGrammarPool.hpp>#include <xercesc/framework/XMLDTDDescription.hpp>#include <xercesc/internal/EndOfEntityException.hpp>#include <xercesc/validators/common/GrammarResolver.hpp>#include <xercesc/validators/DTD/DocTypeHandler.hpp>#include <xercesc/validators/DTD/DTDScanner.hpp>#include <xercesc/validators/DTD/DTDValidator.hpp>#include <xercesc/util/OutOfMemoryException.hpp>#include <xercesc/util/XMLResourceIdentifier.hpp>#include <xercesc/util/HashPtr.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------// DGXMLScanner: Constructors and Destructor// ---------------------------------------------------------------------------DGXMLScanner::DGXMLScanner(XMLValidator* const valToAdopt , GrammarResolver* const grammarResolver , MemoryManager* const manager) : XMLScanner(valToAdopt, grammarResolver, manager) , fAttrNSList(0) , fDTDValidator(0) , fDTDGrammar(0) , fDTDElemNonDeclPool(0) , fElemCount(0) , fAttDefRegistry(0) , fUndeclaredAttrRegistry(0){ try { commonInit(); if (valToAdopt) { if (!valToAdopt->handlesDTD()) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); } else { fValidator = fDTDValidator; } } catch(const OutOfMemoryException&) { throw; } catch(...) { cleanUp(); throw; }}DGXMLScanner::DGXMLScanner( XMLDocumentHandler* const docHandler , DocTypeHandler* const docTypeHandler , XMLEntityHandler* const entityHandler , XMLErrorReporter* const errHandler , XMLValidator* const valToAdopt , GrammarResolver* const grammarResolver , MemoryManager* const manager) : XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager) , fAttrNSList(0) , fDTDValidator(0) , fDTDGrammar(0) , fDTDElemNonDeclPool(0) , fElemCount(0) , fAttDefRegistry(0) , fUndeclaredAttrRegistry(0){ try { commonInit(); if (valToAdopt) { if (!valToAdopt->handlesDTD()) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); } else { fValidator = fDTDValidator; } } catch(const OutOfMemoryException&) { throw; } catch(...) { cleanUp(); throw; }}DGXMLScanner::~DGXMLScanner(){ cleanUp();}// ---------------------------------------------------------------------------// XMLScanner: Getter methods// ---------------------------------------------------------------------------NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool(){ if(!fGrammar) return 0; return ((DTDGrammar*)fGrammar)->getEntityDeclPool();}const NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool() const{ if(!fGrammar) return 0; return ((DTDGrammar*)fGrammar)->getEntityDeclPool();}// ---------------------------------------------------------------------------// DGXMLScanner: Main entry point to scan a document// ---------------------------------------------------------------------------void DGXMLScanner::scanDocument(const InputSource& src){ // Bump up the sequence id for this parser instance. This will invalidate // any previous progressive scan tokens. fSequenceId++; try { // Reset the scanner and its plugged in stuff for a new run. This // resets all the data structures, creates the initial reader and // pushes it on the stack, and sets up the base document path. scanReset(src); // If we have a document handler, then call the start document if (fDocHandler) fDocHandler->startDocument(); // Scan the prolog part, which is everything before the root element // including the DTD subsets. scanProlog(); // If we got to the end of input, then its not a valid XML file. // Else, go on to scan the content. if (fReaderMgr.atEOF()) { emitError(XMLErrs::EmptyMainEntity); } else { // Scan content, and tell it its not an external entity if (scanContent()) { // Do post-parse validation if required if (fValidate) { // We handle ID reference semantics at this level since // its required by XML 1.0. checkIDRefs(); // Then allow the validator to do any extra stuff it wants// fValidator->postParseValidation(); } // That went ok, so scan for any miscellaneous stuff if (!fReaderMgr.atEOF()) scanMiscellaneous(); } } // If we have a document handler, then call the end document if (fDocHandler) fDocHandler->endDocument(); // Reset the reader manager to close all files, sockets, etc... fReaderMgr.reset(); } // NOTE: // // In all of the error processing below, the emitError() call MUST come // before the flush of the reader mgr, or it will fail because it tries // to find out the position in the XML source of the error. catch(const XMLErrs::Codes) { // This is a 'first fatal error' type exit, so reset and fall through fReaderMgr.reset(); } catch(const XMLValid::Codes) { // This is a 'first fatal error' type exit, so reset and fall through fReaderMgr.reset(); } catch(const XMLException& excToCatch) { // Emit the error and catch any user exception thrown from here. Make // sure in all cases we flush the reader manager. fInException = true; try { if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) emitError ( XMLErrs::XMLException_Warning , excToCatch.getType() , excToCatch.getMessage() ); else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) emitError ( XMLErrs::XMLException_Fatal , excToCatch.getType() , excToCatch.getMessage() ); else emitError ( XMLErrs::XMLException_Error , excToCatch.getType() , excToCatch.getMessage() ); } catch(const OutOfMemoryException&) { throw; } catch(...) { // Flush the reader manager and rethrow user's error fReaderMgr.reset(); throw; } // If it returned, then reset the reader manager and fall through fReaderMgr.reset(); } catch(const OutOfMemoryException&) { throw; } catch(...) { // Reset and rethrow fReaderMgr.reset(); throw; }}bool DGXMLScanner::scanNext(XMLPScanToken& token){ // Make sure this token is still legal if (!isLegalToken(token)) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager); // Find the next token and remember the reader id unsigned int orgReader; XMLTokens curToken; bool retVal = true; try { while (true) { // We have to handle any end of entity exceptions that happen here. // We could be at the end of X nested entities, each of which will // generate an end of entity exception as we try to move forward. try { curToken = senseNextToken(orgReader); break; } catch(const EndOfEntityException& toCatch) { // Send an end of entity reference event if (fDocHandler) fDocHandler->endEntityReference(toCatch.getEntity()); } } if (curToken == Token_CharData) { scanCharData(fCDataBuf); } else if (curToken == Token_EOF) { if (!fElemStack.isEmpty()) { const ElemStack::StackElem* topElem = fElemStack.popTop(); emitError ( XMLErrs::EndedWithTagsOnStack , topElem->fThisElement->getFullName() ); } retVal = false; } else { // Its some sort of markup bool gotData = true; switch(curToken) { case Token_CData : // Make sure we are within content if (fElemStack.isEmpty()) emitError(XMLErrs::CDATAOutsideOfContent); scanCDSection(); break; case Token_Comment : scanComment(); break; case Token_EndTag : scanEndTag(gotData); break; case Token_PI : scanPI(); break; case Token_StartTag : scanStartTag(gotData); break; default : fReaderMgr.skipToChar(chOpenAngle); break; } if (orgReader != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialMarkupInEntity); // If we hit the end, then do the miscellaneous part if (!gotData) { // Do post-parse validation if required if (fValidate) { // We handle ID reference semantics at this level since // its required by XML 1.0. checkIDRefs(); // Then allow the validator to do any extra stuff it wants// fValidator->postParseValidation();
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?