xsaxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 693 行 · 第 1/2 页
CPP
693 行
/* * Copyright 2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: XSAXMLScanner.cpp,v 1.2 2004/09/29 23:25:42 cargilld Exp $ */// ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <xercesc/internal/XSAXMLScanner.hpp>#include <xercesc/sax/InputSource.hpp>#include <xercesc/framework/XMLEntityHandler.hpp>#include <xercesc/framework/XMLDocumentHandler.hpp>#include <xercesc/validators/schema/SchemaValidator.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------// XSAXMLScanner: Constructors and Destructor// ---------------------------------------------------------------------------XSAXMLScanner::XSAXMLScanner( GrammarResolver* const grammarResolver , XMLStringPool* const uriStringPool , SchemaGrammar* const xsaGrammar , MemoryManager* const manager) : SGXMLScanner(0, grammarResolver, manager){ fSchemaGrammar = xsaGrammar; setURIStringPool(uriStringPool);}XSAXMLScanner::~XSAXMLScanner(){}// ---------------------------------------------------------------------------// XSAXMLScanner: SGXMLScanner virtual methods// ---------------------------------------------------------------------------// This method will kick off the scanning of the primary content of thevoid XSAXMLScanner::scanEndTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the end of the root element. gotData = true; // Check if the element stack is empty. If so, then this is an unbalanced // element (i.e. more ends than starts, perhaps because of bad text // causing one to be skipped.) if (fElemStack.isEmpty()) { emitError(XMLErrs::MoreEndThanStartTags); fReaderMgr.skipPastChar(chCloseAngle); ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); } // Pop the stack of the element we are supposed to be ending. Remember // that we don't own this. The stack just keeps them and reuses them. unsigned int uriId = fElemStack.getCurrentURI(); // Make sure that its the end of the element that we expect XMLCh *elemName = fElemStack.getCurrentSchemaElemName(); const ElemStack::StackElem* topElem = fElemStack.popTop(); XMLElementDecl *tempElement = topElem->fThisElement; if (!fReaderMgr.skippedString(elemName)) { emitError ( XMLErrs::ExpectedEndOfTagX, elemName ); fReaderMgr.skipPastChar(chCloseAngle); return; } // See if it was the root element, to avoid multiple calls below const bool isRoot = fElemStack.isEmpty(); // Make sure we are back on the same reader as where we started if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialTagMarkupError); // Skip optional whitespace fReaderMgr.skipPastSpaces(); // Make sure we find the closing bracket if (!fReaderMgr.skippedChar(chCloseAngle)) { emitError ( XMLErrs::UnterminatedEndTag, topElem->fThisElement->getFullName() ); } // If validation is enabled, then lets pass him the list of children and // this element and let him validate it. if (fValidate) { int res = fValidator->checkContent ( topElem->fThisElement, topElem->fChildren, topElem->fChildCount ); if (res >= 0) { // One of the elements is not valid for the content. NOTE that // if no children were provided but the content model requires // them, it comes back with a zero value. But we cannot use that // to index the child array in this case, and have to put out a // special message. if (!topElem->fChildCount) { fValidator->emitError ( XMLValid::EmptyNotValidForContent , topElem->fThisElement->getFormattedContentModel() ); } else if ((unsigned int)res >= topElem->fChildCount) { fValidator->emitError ( XMLValid::NotEnoughElemsForCM , topElem->fThisElement->getFormattedContentModel() ); } else { fValidator->emitError ( XMLValid::ElementNotValidForContent , topElem->fChildren[res]->getRawName() , topElem->fThisElement->getFormattedContentModel() ); } } } // now we can reset the datatype buffer, since the // application has had a chance to copy the characters somewhere else ((SchemaValidator *)fValidator)->clearDatatypeBuffer(); // If we have a doc handler, tell it about the end tag if (fDocHandler) { fDocHandler->endElement ( *topElem->fThisElement, uriId, isRoot, fPrefixBuf.getRawBuffer() ); } // If this was the root, then done with content gotData = !isRoot; if (gotData) { // Restore the grammar fGrammar = fElemStack.getCurrentGrammar(); fGrammarType = fGrammar->getGrammarType(); fValidator->setGrammar(fGrammar); // Restore the validation flag fValidate = fElemStack.getValidationFlag(); }}bool XSAXMLScanner::scanStartTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the root and its empty. gotData = true; // Reset element content fContent.reset(); // The current position is after the open bracket, so we need to read in // in the element name. if (!fReaderMgr.getName(fQNameBuf)) { emitError(XMLErrs::ExpectedElementName); fReaderMgr.skipToChar(chOpenAngle); return false; } // See if its the root element const bool isRoot = fElemStack.isEmpty(); // Skip any whitespace after the name fReaderMgr.skipPastSpaces(); // First we have to do the rawest attribute scan. We don't do any // normalization of them at all, since we don't know yet what type they // might be (since we need the element decl in order to do that.) const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); bool isEmpty; unsigned int attCount = rawAttrScan(qnameRawBuf, *fRawAttrList, isEmpty); // save the contentleafname and currentscope before addlevel, for later use ContentLeafNameTypeVector* cv = 0; XMLContentModel* cm = 0; int currentScope = Grammar::TOP_LEVEL_SCOPE; bool laxThisOne = false; if (!isRoot) { // schema validator will have correct type if validating SchemaElementDecl* tempElement = (SchemaElementDecl*) fElemStack.topElement()->fThisElement; SchemaElementDecl::ModelTypes modelType = tempElement->getModelType(); ComplexTypeInfo *currType = 0; if (fValidate) { currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); if (currType) modelType = (SchemaElementDecl::ModelTypes)currType->getContentType(); else // something must have gone wrong modelType = SchemaElementDecl::Any; } else { currType = tempElement->getComplexTypeInfo(); } if ((modelType == SchemaElementDecl::Mixed_Simple) || (modelType == SchemaElementDecl::Mixed_Complex) || (modelType == SchemaElementDecl::Children)) { cm = currType->getContentModel(); cv = cm->getContentLeafNameTypeVector(); currentScope = fElemStack.getCurrentScope(); } else if (modelType == SchemaElementDecl::Any) { laxThisOne = true; } } // Now, since we might have to update the namespace map for this element, // but we don't have the element decl yet, we just tell the element stack // to expand up to get ready. unsigned int elemDepth = fElemStack.addLevel(); fElemStack.setValidationFlag(fValidate); // Make an initial pass through the list and find any xmlns attributes or // schema attributes. if (attCount) scanRawAttrListforNameSpaces(attCount); // Resolve the qualified name to a URI and name so that we can look up // the element decl for this element. We have now update the prefix to // namespace map so we should get the correct element now. int prefixColonPos = -1; unsigned int uriId = resolveQName ( qnameRawBuf, fPrefixBuf, ElemStack::Mode_Element, prefixColonPos ); //if schema, check if we should lax or skip the validation of this element bool parentValidation = fValidate; if (cv) { QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager); // elementDepth will be > 0, as cv is only constructed if element is not // root. laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1); } // Look up the element now in the grammar. This will get us back a // generic element decl object. We tell him to fault one in if he does // not find it. bool wasAdded = false; const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1]; XMLElementDecl* elemDecl = fGrammar->getElemDecl ( uriId, nameRawBuf, qnameRawBuf, currentScope ); if (!elemDecl) { // URI is different, so we try to switch grammar if (uriId != fURIStringPool->getId(fGrammar->getTargetNamespace())) { switchGrammar(getURIText(uriId), laxThisOne); } // look for a global element declaration elemDecl = fGrammar->getElemDecl( uriId, nameRawBuf, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE ); if (!elemDecl) { // if still not found, look in list of undeclared elements elemDecl = fElemNonDeclPool->getByKey( nameRawBuf, uriId, Grammar::TOP_LEVEL_SCOPE); if (!elemDecl) { elemDecl = new (fMemoryManager) SchemaElementDecl ( fPrefixBuf.getRawBuffer(), nameRawBuf, uriId , SchemaElementDecl::Any, Grammar::TOP_LEVEL_SCOPE , fMemoryManager ); elemDecl->setId ( fElemNonDeclPool->put ( (void*)elemDecl->getBaseName(), uriId , Grammar::TOP_LEVEL_SCOPE, (SchemaElementDecl*)elemDecl ) ); wasAdded = true; } } } // We do something different here according to whether we found the // element or not. if (wasAdded || !elemDecl->isDeclared()) { if (laxThisOne) { fValidate = false; fElemStack.setValidationFlag(fValidate); } // If validating then emit an error if (fValidate) { // This is to tell the reuse Validator that this element was // faulted-in, was not an element in the grammar pool originally elemDecl->setCreateReason(XMLElementDecl::JustFaultIn); fValidator->emitError ( XMLValid::ElementNotDefined, elemDecl->getFullName()
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?