igxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页
CPP
1,855 行
); toFill.addElement(curPair); } else { curPair = toFill.elementAt(attCount); curPair->set(fAttNameBuf.getRawBuffer(), fAttValueBuf.getRawBuffer()); } // And bump the count of attributes we've gotten attCount++; // And go to the top again for another attribute continue; } // It was some special case character so do all of the checks and // deal with it. if (!nextCh) ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); if (nextCh == chForwardSlash) { fReaderMgr.getNextChar(); isEmpty = true; if (!fReaderMgr.skippedChar(chCloseAngle)) emitError(XMLErrs::UnterminatedStartTag, elemName); break; } else if (nextCh == chCloseAngle) { fReaderMgr.getNextChar(); break; } else if (nextCh == chOpenAngle) { // Check for this one specially, since its going to be common // and it is kind of auto-recovering since we've already hit the // next open bracket, which is what we would have seeked to (and // skipped this whole tag.) emitError(XMLErrs::UnterminatedStartTag, elemName); break; } else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote)) { // Check for this one specially, which is probably a missing // attribute name, e.g. ="value". Just issue expected name // error and eat the quoted string, then jump back to the // top again. emitError(XMLErrs::ExpectedAttrName); fReaderMgr.getNextChar(); fReaderMgr.skipQuotedString(nextCh); fReaderMgr.skipPastSpaces(); continue; } } return attCount;}// This method will kick off the scanning of the primary content of the// document, i.e. the elements.bool IGXMLScanner::scanContent(){ // Go into a loop until we hit the end of the root element, or we fall // out because there is no root element. // // We have to do kind of a deeply nested double loop here in order to // avoid doing the setup/teardown of the exception handler on each // round. Doing it this way we only do it when an exception actually // occurs. bool gotData = true; bool inMarkup = false; while (gotData) { try { while (gotData) { // Sense what the next top level token is. According to what // this tells us, we will call something to handle that kind // of thing. unsigned int orgReader; const XMLTokens curToken = senseNextToken(orgReader); // Handle character data and end of file specially. Char data // is not markup so we don't want to handle it in the loop // below. if (curToken == Token_CharData) { // Scan the character data and call appropriate events. Let // him use our local character data buffer for efficiency. scanCharData(fCDataBuf); continue; } else if (curToken == Token_EOF) { // The element stack better be empty at this point or we // ended prematurely before all elements were closed. if (!fElemStack.isEmpty()) { const ElemStack::StackElem* topElem = fElemStack.popTop(); emitError ( XMLErrs::EndedWithTagsOnStack , topElem->fThisElement->getFullName() ); } // Its the end of file, so clear the got data flag gotData = false; continue; } // We are in some sort of markup now inMarkup = true; // According to the token we got, call the appropriate // scanning method. switch(curToken) { case Token_CData : // Make sure we are within content if (fElemStack.isEmpty()) emitError(XMLErrs::CDATAOutsideOfContent); scanCDSection(); break; case Token_Comment : scanComment(); break; case Token_EndTag : scanEndTag(gotData); break; case Token_PI : scanPI(); break; case Token_StartTag : if (fDoNamespaces) scanStartTagNS(gotData); else scanStartTag(gotData); break; default : fReaderMgr.skipToChar(chOpenAngle); break; } if (orgReader != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialMarkupInEntity); // And we are back out of markup again inMarkup = false; } } catch(const EndOfEntityException& toCatch) { // If we were in some markup when this happened, then its a // partial markup error. if (inMarkup) emitError(XMLErrs::PartialMarkupInEntity); // Send an end of entity reference event if (fDocHandler) fDocHandler->endEntityReference(toCatch.getEntity()); inMarkup = false; } } // It went ok, so return success return true;}void IGXMLScanner::scanEndTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the end of the root element. gotData = true; // Check if the element stack is empty. If so, then this is an unbalanced // element (i.e. more ends than starts, perhaps because of bad text // causing one to be skipped.) if (fElemStack.isEmpty()) { emitError(XMLErrs::MoreEndThanStartTags); fReaderMgr.skipPastChar(chCloseAngle); ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); } // Pop the stack of the element we are supposed to be ending. Remember // that we don't own this. The stack just keeps them and reuses them. unsigned int uriId = (fDoNamespaces) ? fElemStack.getCurrentURI() : fEmptyNamespaceId; // these get initialized below const ElemStack::StackElem* topElem = 0; XMLElementDecl *tempElement = 0; XMLCh *elemName = 0; // Make sure that its the end of the element that we expect // special case for schema validation, whose element decls, // obviously don't contain prefix information if(fGrammarType == Grammar::SchemaGrammarType) { elemName = fElemStack.getCurrentSchemaElemName(); topElem = fElemStack.popTop(); tempElement = topElem->fThisElement; } else { topElem = fElemStack.popTop(); tempElement = topElem->fThisElement; elemName = (XMLCh *)tempElement->getFullName(); } if (!fReaderMgr.skippedString(elemName)) { emitError ( XMLErrs::ExpectedEndOfTagX , elemName ); fReaderMgr.skipPastChar(chCloseAngle); return; } // See if it was the root element, to avoid multiple calls below const bool isRoot = fElemStack.isEmpty(); // Make sure we are back on the same reader as where we started if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialTagMarkupError); // Skip optional whitespace fReaderMgr.skipPastSpaces(); // Make sure we find the closing bracket if (!fReaderMgr.skippedChar(chCloseAngle)) { emitError ( XMLErrs::UnterminatedEndTag , topElem->fThisElement->getFullName() ); } if (fGrammarType == Grammar::SchemaGrammarType) { // reset error occurred fPSVIElemContext.fErrorOccurred = fErrorStack->pop(); if (fValidate && topElem->fThisElement->isDeclared()) { fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); if(!fPSVIElemContext.fCurrentTypeInfo) fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); else fPSVIElemContext.fCurrentDV = 0; if(fPSVIHandler) { fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue(); if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString)) fPSVIElemContext.fNormalizedValue = 0; } } else { fPSVIElemContext.fCurrentDV = 0; fPSVIElemContext.fCurrentTypeInfo = 0; fPSVIElemContext.fNormalizedValue = 0; } } // If validation is enabled, then lets pass him the list of children and // this element and let him validate it. DatatypeValidator* psviMemberType = 0; if (fValidate) { // // XML1.0-3rd // Validity Constraint: // The declaration matches EMPTY and the element has no content (not even // entity references, comments, PIs or white space). // if ( (fGrammarType == Grammar::DTDGrammarType) && (topElem->fCommentOrPISeen) && (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty)) { fValidator->emitError ( XMLValid::EmptyElemHasContent , topElem->fThisElement->getFullName() ); } // // XML1.0-3rd // Validity Constraint: // // The declaration matches children and the sequence of child elements // belongs to the language generated by the regular expression in the // content model, with optional white space, comments and PIs // (i.e. markup matching production [27] Misc) between the start-tag and // the first child element, between child elements, or between the last // child element and the end-tag. // // Note that // a CDATA section containing only white space or // a reference to an entity whose replacement text is character references // expanding to white space do not match the nonterminal S, and hence // cannot appear in these positions; however, // a reference to an internal entity with a literal value consisting // of character references expanding to white space does match S, // since its replacement text is the white space resulting from expansion // of the character references. // if ( (fGrammarType == Grammar::DTDGrammarType) && (topElem->fReferenceEscaped) && (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children)) { fValidator->emitError ( XMLValid::ElemChildrenHasInvalidWS , topElem->fThisElement->getFullName() ); } int res = fValidator->checkContent ( topElem->fThisElement , topElem->fChildren , topElem->fChildCount ); if (res >= 0) { // One of the elements is not valid for the content. NOTE that // if no children were provided but the content model requires // them, it comes back with a zero value. But we cannot use that // to index the child array in this case, and have to put out a // special message. if (!topElem->fChildCount) { fValidator->emitError ( XMLValid::EmptyNotValidForContent , topElem->fThisElement->getFormattedContentModel() ); } else if ((unsigned int)res >= topElem->fChildCount) { fValidator->emitError ( XMLValid::NotEnoughElemsForCM , topElem->fThisElement->getFormattedContentModel() ); } else { fValidator->emitError ( XMLValid::ElementNotValidForContent , topElem->fChildren[res]->getRawName() , topElem->fThisElement->getFormattedContentModel()
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?