dgxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,902 行 · 第 1/5 页
CPP
1,902 行
} // That went ok, so scan for any miscellaneous stuff scanMiscellaneous(); if (fDocHandler) fDocHandler->endDocument(); } } } // NOTE: // // In all of the error processing below, the emitError() call MUST come // before the flush of the reader mgr, or it will fail because it tries // to find out the position in the XML source of the error. catch(const XMLErrs::Codes) { // This is a 'first failure' exception, so reset and return failure fReaderMgr.reset(); return false; } catch(const XMLValid::Codes) { // This is a 'first fatal error' type exit, so reset and reuturn failure fReaderMgr.reset(); return false; } catch(const XMLException& excToCatch) { // Emit the error and catch any user exception thrown from here. Make // sure in all cases we flush the reader manager. fInException = true; try { if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) emitError ( XMLErrs::XMLException_Warning , excToCatch.getType() , excToCatch.getMessage() ); else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) emitError ( XMLErrs::XMLException_Fatal , excToCatch.getType() , excToCatch.getMessage() ); else emitError ( XMLErrs::XMLException_Error , excToCatch.getType() , excToCatch.getMessage() ); } catch(const OutOfMemoryException&) { throw; } catch(...) { // Reset and rethrow user error fReaderMgr.reset(); throw; } // Reset and return failure fReaderMgr.reset(); return false; } catch(const OutOfMemoryException&) { throw; } catch(...) { // Reset and rethrow original error fReaderMgr.reset(); throw; } // If we hit the end, then flush the reader manager if (!retVal) fReaderMgr.reset(); return retVal;}// ---------------------------------------------------------------------------// DGXMLScanner: Private scanning methods// ---------------------------------------------------------------------------// This method will kick off the scanning of the primary content of the// document, i.e. the elements.bool DGXMLScanner::scanContent(){ // Go into a loop until we hit the end of the root element, or we fall // out because there is no root element. // // We have to do kind of a deeply nested double loop here in order to // avoid doing the setup/teardown of the exception handler on each // round. Doing it this way we only do it when an exception actually // occurs. bool gotData = true; bool inMarkup = false; while (gotData) { try { while (gotData) { // Sense what the next top level token is. According to what // this tells us, we will call something to handle that kind // of thing. unsigned int orgReader; const XMLTokens curToken = senseNextToken(orgReader); // Handle character data and end of file specially. Char data // is not markup so we don't want to handle it in the loop // below. if (curToken == Token_CharData) { // Scan the character data and call appropriate events. Let // him use our local character data buffer for efficiency. scanCharData(fCDataBuf); continue; } else if (curToken == Token_EOF) { // The element stack better be empty at this point or we // ended prematurely before all elements were closed. if (!fElemStack.isEmpty()) { const ElemStack::StackElem* topElem = fElemStack.popTop(); emitError ( XMLErrs::EndedWithTagsOnStack , topElem->fThisElement->getFullName() ); } // Its the end of file, so clear the got data flag gotData = false; continue; } // We are in some sort of markup now inMarkup = true; // According to the token we got, call the appropriate // scanning method. switch(curToken) { case Token_CData : // Make sure we are within content if (fElemStack.isEmpty()) emitError(XMLErrs::CDATAOutsideOfContent); scanCDSection(); break; case Token_Comment : scanComment(); break; case Token_EndTag : scanEndTag(gotData); break; case Token_PI : scanPI(); break; case Token_StartTag : scanStartTag(gotData); break; default : fReaderMgr.skipToChar(chOpenAngle); break; } if (orgReader != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialMarkupInEntity); // And we are back out of markup again inMarkup = false; } } catch(const EndOfEntityException& toCatch) { // If we were in some markup when this happened, then its a // partial markup error. if (inMarkup) emitError(XMLErrs::PartialMarkupInEntity); // Send an end of entity reference event if (fDocHandler) fDocHandler->endEntityReference(toCatch.getEntity()); inMarkup = false; } } // It went ok, so return success return true;}void DGXMLScanner::scanEndTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the end of the root element. gotData = true; // Check if the element stack is empty. If so, then this is an unbalanced // element (i.e. more ends than starts, perhaps because of bad text // causing one to be skipped.) if (fElemStack.isEmpty()) { emitError(XMLErrs::MoreEndThanStartTags); fReaderMgr.skipPastChar(chCloseAngle); ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); } // Pop the stack of the element we are supposed to be ending. Remember // that we don't own this. The stack just keeps them and reuses them. unsigned int uriId = (fDoNamespaces) ? fElemStack.getCurrentURI() : fEmptyNamespaceId; // Pop the stack of the element we are supposed to be ending. Remember // that we don't own this. The stack just keeps them and reuses them. const ElemStack::StackElem* topElem = fElemStack.popTop(); XMLElementDecl *tempElement = topElem->fThisElement; // See if it was the root element, to avoid multiple calls below const bool isRoot = fElemStack.isEmpty(); // Make sure that its the end of the element that we expect if (!fReaderMgr.skippedString(tempElement->getFullName())) { emitError ( XMLErrs::ExpectedEndOfTagX , tempElement->getFullName() ); fReaderMgr.skipPastChar(chCloseAngle); return; } // Make sure we are back on the same reader as where we started if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialTagMarkupError); // Skip optional whitespace fReaderMgr.skipPastSpaces(); // Make sure we find the closing bracket if (!fReaderMgr.skippedChar(chCloseAngle)) { emitError ( XMLErrs::UnterminatedEndTag , topElem->fThisElement->getFullName() ); } // If validation is enabled, then lets pass him the list of children and // this element and let him validate it. if (fValidate) { // // XML1.0-3rd // Validity Constraint: // The declaration matches EMPTY and the element has no content (not even // entity references, comments, PIs or white space). // if ( (topElem->fCommentOrPISeen) && (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty)) { fValidator->emitError ( XMLValid::EmptyElemHasContent , topElem->fThisElement->getFullName() ); } // // XML1.0-3rd // Validity Constraint: // // The declaration matches children and the sequence of child elements // belongs to the language generated by the regular expression in the // content model, with optional white space, comments and PIs // (i.e. markup matching production [27] Misc) between the start-tag and // the first child element, between child elements, or between the last // child element and the end-tag. // // Note that // a CDATA section containing only white space or // a reference to an entity whose replacement text is character references // expanding to white space do not match the nonterminal S, and hence // cannot appear in these positions; however, // a reference to an internal entity with a literal value consisting // of character references expanding to white space does match S, // since its replacement text is the white space resulting from expansion // of the character references. // if ( (topElem->fReferenceEscaped) && (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children)) { fValidator->emitError ( XMLValid::ElemChildrenHasInvalidWS , topElem->fThisElement->getFullName() ); } int res = fValidator->checkContent ( topElem->fThisElement , topElem->fChildren , topElem->fChildCount ); if (res >= 0) { // One of the elements is not valid for the content. NOTE that // if no children were provided but the content model requires // them, it comes back with a zero value. But we cannot use that // to index the child array in this case, and have to put out a // special message. if (!topElem->fChildCount) { fValidator->emitError ( XMLValid::EmptyNotValidForContent , topElem->fThisElement->getFormattedContentModel() ); } else if ((unsigned int)res >= topElem->fChildCount) { fValidator->emitError ( XMLValid::NotEnoughElemsForCM , topElem->fThisElement->getFormattedContentModel() ); } else { fValidator->emitError ( XMLValid::ElementNotValidForContent , topElem->fChildren[res]->getRawName() , topElem->fThisElement->getFormattedContentModel() ); } } } // If we have a doc handler, tell it about the end tag if (fDocHandler) { fDocHandler->endElement ( *topElem->fThisElement , uriId , isRoot , (fDoNamespaces) ? topElem->fThisElement->getElementName()->getPrefix() : XMLUni::fgZeroLenString ); } // If this was the root, then done with content gotData = !isRoot;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?