sgxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,881 行 · 第 1/5 页
CPP
1,881 行
// this tells us, we will call something to handle that kind // of thing. unsigned int orgReader; const XMLTokens curToken = senseNextToken(orgReader); // Handle character data and end of file specially. Char data // is not markup so we don't want to handle it in the loop // below. if (curToken == Token_CharData) { // Scan the character data and call appropriate events. Let // him use our local character data buffer for efficiency. scanCharData(fCDataBuf); continue; } else if (curToken == Token_EOF) { // The element stack better be empty at this point or we // ended prematurely before all elements were closed. if (!fElemStack.isEmpty()) { const ElemStack::StackElem* topElem = fElemStack.popTop(); emitError ( XMLErrs::EndedWithTagsOnStack , topElem->fThisElement->getFullName() ); } // Its the end of file, so clear the got data flag gotData = false; continue; } // We are in some sort of markup now inMarkup = true; // According to the token we got, call the appropriate // scanning method. switch(curToken) { case Token_CData : // Make sure we are within content if (fElemStack.isEmpty()) emitError(XMLErrs::CDATAOutsideOfContent); scanCDSection(); break; case Token_Comment : scanComment(); break; case Token_EndTag : scanEndTag(gotData); break; case Token_PI : scanPI(); break; case Token_StartTag : scanStartTag(gotData); break; default : fReaderMgr.skipToChar(chOpenAngle); break; } if (orgReader != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialMarkupInEntity); // And we are back out of markup again inMarkup = false; } } catch(const EndOfEntityException& toCatch) { // If we were in some markup when this happened, then its a // partial markup error. if (inMarkup) emitError(XMLErrs::PartialMarkupInEntity); // Send an end of entity reference event if (fDocHandler) fDocHandler->endEntityReference(toCatch.getEntity()); inMarkup = false; } } // It went ok, so return success return true;}void SGXMLScanner::scanEndTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the end of the root element. gotData = true; // Check if the element stack is empty. If so, then this is an unbalanced // element (i.e. more ends than starts, perhaps because of bad text // causing one to be skipped.) if (fElemStack.isEmpty()) { emitError(XMLErrs::MoreEndThanStartTags); fReaderMgr.skipPastChar(chCloseAngle); ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); } // Pop the stack of the element we are supposed to be ending. Remember // that we don't own this. The stack just keeps them and reuses them. unsigned int uriId = (fDoNamespaces) ? fElemStack.getCurrentURI() : fEmptyNamespaceId; // Make sure that its the end of the element that we expect XMLCh *elemName = fElemStack.getCurrentSchemaElemName(); const ElemStack::StackElem* topElem = fElemStack.popTop(); XMLElementDecl *tempElement = topElem->fThisElement; if (!fReaderMgr.skippedString(elemName)) { emitError ( XMLErrs::ExpectedEndOfTagX , elemName ); fReaderMgr.skipPastChar(chCloseAngle); return; } // See if it was the root element, to avoid multiple calls below const bool isRoot = fElemStack.isEmpty(); fPSVIElemContext.fErrorOccurred = fErrorStack->pop(); // Make sure we are back on the same reader as where we started if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialTagMarkupError); // Skip optional whitespace fReaderMgr.skipPastSpaces(); // Make sure we find the closing bracket if (!fReaderMgr.skippedChar(chCloseAngle)) { emitError ( XMLErrs::UnterminatedEndTag , topElem->fThisElement->getFullName() ); } if (fValidate && topElem->fThisElement->isDeclared()) { fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); if(!fPSVIElemContext.fCurrentTypeInfo) fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); else fPSVIElemContext.fCurrentDV = 0; if (fPSVIHandler) { fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue(); if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString)) fPSVIElemContext.fNormalizedValue = 0; } } else { fPSVIElemContext.fCurrentDV = 0; fPSVIElemContext.fCurrentTypeInfo = 0; fPSVIElemContext.fNormalizedValue = 0; } // If validation is enabled, then lets pass him the list of children and // this element and let him validate it. DatatypeValidator* psviMemberType = 0; if (fValidate) { int res = fValidator->checkContent ( topElem->fThisElement , topElem->fChildren , topElem->fChildCount ); if (res >= 0) { // One of the elements is not valid for the content. NOTE that // if no children were provided but the content model requires // them, it comes back with a zero value. But we cannot use that // to index the child array in this case, and have to put out a // special message. if (!topElem->fChildCount) { fValidator->emitError ( XMLValid::EmptyNotValidForContent , topElem->fThisElement->getFormattedContentModel() ); } else if ((unsigned int)res >= topElem->fChildCount) { fValidator->emitError ( XMLValid::NotEnoughElemsForCM , topElem->fThisElement->getFormattedContentModel() ); } else { fValidator->emitError ( XMLValid::ElementNotValidForContent , topElem->fChildren[res]->getRawName() , topElem->fThisElement->getFormattedContentModel() ); } } // update PSVI info if (((SchemaValidator*) fValidator)->getErrorOccurred()) fPSVIElemContext.fErrorOccurred = true; else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union) psviMemberType = fValidationContext->getValidatingMemberType(); if (fPSVIHandler) { fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified(); if(fPSVIElemContext.fIsSpecified) fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue(); } // call matchers and de-activate context if (toCheckIdentityConstraint()) { fICHandler->deactivateContext ( (SchemaElementDecl *) topElem->fThisElement , fContent.getRawBuffer() ); } } if (fPSVIHandler) { endElementPSVI ( (SchemaElementDecl*)topElem->fThisElement, psviMemberType ); } // now we can reset the datatype buffer, since the // application has had a chance to copy the characters somewhere else ((SchemaValidator *)fValidator)->clearDatatypeBuffer(); // If we have a doc handler, tell it about the end tag if (fDocHandler) { fDocHandler->endElement ( *topElem->fThisElement , uriId , isRoot , fPrefixBuf.getRawBuffer() ); } if (!isRoot) { // update error information fErrorStack->push(fErrorStack->pop() || fPSVIElemContext.fErrorOccurred); } // If this was the root, then done with content gotData = !isRoot; if (gotData) { // Restore the grammar fGrammar = fElemStack.getCurrentGrammar(); fGrammarType = fGrammar->getGrammarType(); fValidator->setGrammar(fGrammar); // Restore the validation flag fValidate = fElemStack.getValidationFlag(); }}// This method handles the high level logic of scanning the DOCType// declaration. This calls the DTDScanner and kicks off both the scanning of// the internal subset and the scanning of the external subset, if any.//// When we get here the '<!DOCTYPE' part has already been scanned, which is// what told us that we had a doc type decl to parse.void SGXMLScanner::scanDocTypeDecl(){ // Just skips over it // REVISIT: Should we issue a warning static const XMLCh doctypeIE[] = { chOpenSquare, chCloseAngle, chNull }; XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE); if (nextCh == chOpenSquare) fReaderMgr.skipPastChar(chCloseSquare); fReaderMgr.skipPastChar(chCloseAngle);}// This method is called to scan a start tag when we are processing// namespaces. This method is called after we've scanned the < of a// start tag. So we have to get the element name, then scan the attributes, // after which we are either going to see >, />, or attributes followed // by one of those sequences.bool SGXMLScanner::scanStartTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the root and its empty. gotData = true; // Reset element content fContent.reset(); // The current position is after the open bracket, so we need to read in // in the element name. if (!fReaderMgr.getName(fQNameBuf)) { emitError(XMLErrs::ExpectedElementName); fReaderMgr.skipToChar(chOpenAngle); return false; } // See if its the root element const bool isRoot = fElemStack.isEmpty(); // Skip any whitespace after the name fReaderMgr.skipPastSpaces(); // First we have to do the rawest attribute scan. We don't do any // normalization of them at all, since we don't know yet what type they // might be (since we need the element decl in order to do that.) const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); bool isEmpty; unsigned int attCount = rawAttrScan ( qnameRawBuf , *fRawAttrList , isEmpty ); // save the contentleafname and currentscope before addlevel, for later use ContentLeafNameTypeVector* cv = 0; XMLContentModel* cm = 0; int currentScope = Grammar::TOP_LEVEL_SCOPE; bool laxThisOne = false; if (!isRoot) { // schema validator will have correct type if validating SchemaElementDecl* tempElement = (SchemaElementDecl*) fElemStack.topElement()->fThisElement; SchemaElementDecl::ModelTypes modelType = tempElement->getModelType(); ComplexTypeInfo *currType = 0; if (fValidate) { currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); if (currType) modelType = (SchemaElementDecl::ModelTypes)currType->getContentType(); else // something must have gone wrong modelType = SchemaElementDecl::Any; } else
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?