igxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页
CPP
1,855 行
); } } if (fGrammarType == Grammar::SchemaGrammarType) { if (((SchemaValidator*) fValidator)->getErrorOccurred()) fPSVIElemContext.fErrorOccurred = true; else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union) psviMemberType = fValidationContext->getValidatingMemberType(); if (fPSVIHandler) { fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified(); if(fPSVIElemContext.fIsSpecified) fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue(); } // call matchers and de-activate context if (toCheckIdentityConstraint()) { fICHandler->deactivateContext ( (SchemaElementDecl *) topElem->fThisElement , fContent.getRawBuffer() ); } } } if (fGrammarType == Grammar::SchemaGrammarType) { if (fPSVIHandler) { endElementPSVI( (SchemaElementDecl*)topElem->fThisElement, psviMemberType); } // now we can reset the datatype buffer, since the // application has had a chance to copy the characters somewhere else ((SchemaValidator *)fValidator)->clearDatatypeBuffer(); } // If we have a doc handler, tell it about the end tag if (fDocHandler) { fDocHandler->endElement ( *topElem->fThisElement , uriId , isRoot , fPrefixBuf.getRawBuffer() ); } if (fGrammarType == Grammar::SchemaGrammarType) { if (!isRoot) { // update error information fErrorStack->push(fErrorStack->pop() || fPSVIElemContext.fErrorOccurred); } } // If this was the root, then done with content gotData = !isRoot; if (gotData) { if (fDoNamespaces) { // Restore the grammar fGrammar = fElemStack.getCurrentGrammar(); fGrammarType = fGrammar->getGrammarType(); if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) { if (fValidatorFromUser) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); else { fValidator = fSchemaValidator; } } else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) { if (fValidatorFromUser) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); else { fValidator = fDTDValidator; } } fValidator->setGrammar(fGrammar); } // Restore the validation flag fValidate = fElemStack.getValidationFlag(); }}// This method handles the high level logic of scanning the DOCType// declaration. This calls the DTDScanner and kicks off both the scanning of// the internal subset and the scanning of the external subset, if any.//// When we get here the '<!DOCTYPE' part has already been scanned, which is// what told us that we had a doc type decl to parse.void IGXMLScanner::scanDocTypeDecl(){ // We have a doc type. So, switch the Grammar. switchGrammar(XMLUni::fgDTDEntityString); if (fDocTypeHandler) fDocTypeHandler->resetDocType(); // There must be some space after DOCTYPE if (!fReaderMgr.skipPastSpaces()) { emitError(XMLErrs::ExpectedWhitespace); // Just skip the Doctype declaration and return fReaderMgr.skipPastChar(chCloseAngle); return; } // Get a buffer for the root element XMLBufBid bbRootName(&fBufMgr); // Get a name from the input, which should be the name of the root // element of the upcoming content. fReaderMgr.getName(bbRootName.getBuffer()); if (bbRootName.isEmpty()) { emitError(XMLErrs::NoRootElemInDOCTYPE); fReaderMgr.skipPastChar(chCloseAngle); return; } // Store the root element name for later check setRootElemName(bbRootName.getRawBuffer()); // This element obviously is not going to exist in the element decl // pool yet, but we need to call docTypeDecl. So force it into // the element decl pool, marked as being there because it was in // the DOCTYPE. Later, when its declared, the status will be updated. // // Only do this if we are not reusing the validator! If we are reusing, // then look it up instead. It has to exist! DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl ( bbRootName.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fGrammarPoolMemoryManager ); Janitor<DTDElementDecl> rootDeclJanitor(rootDecl); rootDecl->setCreateReason(DTDElementDecl::AsRootElem); rootDecl->setExternalElemDeclaration(true); if(!fUseCachedGrammar) { // this will break getRootElemId on DTDGrammar when // cached grammars are in use, but // why would one use this anyway??? ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl)); rootDeclJanitor.release(); } else { // attach this to the undeclared element pool so that it gets deleted XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer()); if (elemDecl) { rootDecl->setId(elemDecl->getId()); } else { rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl)); rootDeclJanitor.release(); } } // Skip any spaces after the name fReaderMgr.skipPastSpaces(); // And now if we are looking at a >, then we are done. It is not // required to have an internal or external subset, though why you // would not escapes me. if (fReaderMgr.skippedChar(chCloseAngle)) { // If we have a doc type handler and advanced callbacks are enabled, // call the doctype event. if (fDocTypeHandler) fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false); return; } // either internal/external subset if (fValScheme == Val_Auto && !fValidate) fValidate = true; bool hasIntSubset = false; bool hasExtSubset = false; XMLCh* sysId = 0; XMLCh* pubId = 0; DTDScanner dtdScanner ( (DTDGrammar*) fGrammar , fDocTypeHandler , fGrammarPoolMemoryManager , fMemoryManager ); dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr); // If the next character is '[' then we have no external subset cause // there is no system id, just the opening character of the internal // subset. Else, has to be an id. // // Just look at the next char, don't eat it. if (fReaderMgr.peekNextChar() == chOpenSquare) { hasIntSubset = true; } else { // Indicate we have an external subset hasExtSubset = true; fHasNoDTD = false; // Get buffers for the ids XMLBufBid bbPubId(&fBufMgr); XMLBufBid bbSysId(&fBufMgr); // Get the external subset id if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External)) { fReaderMgr.skipPastChar(chCloseAngle); return; } // Get copies of the ids we got pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager); sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager); // Skip spaces and check again for the opening of an internal subset fReaderMgr.skipPastSpaces(); // Just look at the next char, don't eat it. if (fReaderMgr.peekNextChar() == chOpenSquare) { hasIntSubset = true; } } // Insure that the ids get cleaned up, if they got allocated ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager); ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager); // If we have a doc type handler and advanced callbacks are enabled, // call the doctype event. if (fDocTypeHandler) fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset); // Ok, if we had an internal subset, we are just past the [ character // and need to parse that first. if (hasIntSubset) { // Eat the opening square bracket fReaderMgr.getNextChar(); checkInternalDTD(hasExtSubset, sysId); // And try to scan the internal subset. If we fail, try to recover // by skipping forward tot he close angle and returning. if (!dtdScanner.scanInternalSubset()) { fReaderMgr.skipPastChar(chCloseAngle); return; } // Do a sanity check that some expanded PE did not propogate out of // the doctype. This could happen if it was terminated early by bad // syntax. if (fReaderMgr.getReaderDepth() > 1) { emitError(XMLErrs::PEPropogated); // Ask the reader manager to pop back down to the main level fReaderMgr.cleanStackBackTo(1); } fReaderMgr.skipPastSpaces(); } // And that should leave us at the closing > of the DOCTYPE line if (!fReaderMgr.skippedChar(chCloseAngle)) { // Do a special check for the common scenario of an extra ] char at // the end. This is easy to recover from. if (fReaderMgr.skippedChar(chCloseSquare) && fReaderMgr.skippedChar(chCloseAngle)) { emitError(XMLErrs::ExtraCloseSquare); } else { emitError(XMLErrs::UnterminatedDOCTYPE); fReaderMgr.skipPastChar(chCloseAngle); } } // If we had an external subset, then we need to deal with that one // next. If we are reusing the validator, then don't scan it. if (hasExtSubset) { if (fUseCachedGrammar) { InputSource* sysIdSrc = resolveSystemId(sysId); Janitor<InputSource> janSysIdSrc(sysIdSrc); Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId()); if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) { fDTDGrammar = (DTDGrammar*) grammar; fGrammar = fDTDGrammar; fValidator->setGrammar(fGrammar); // should not be modifying cached grammars! /******** rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); if (rootDecl) ((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId()); else { rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl ( bbRootName.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fGrammarPoolMemoryManager ); rootDecl->setCreateReason(DTDElementDecl::AsRootElem); rootDecl->setExternalElemDeclaration(true); ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl)); } ********/ return; } } if (fLoadExternalDTD || fValidate) { // And now create a reader to read this entity InputSource* srcUsed; XMLReader* reader = fReaderMgr.createReader ( sysId , pubId , false , XMLReader::RefFrom_NonLiteral , XMLReader::Type_General , XMLReader::Source_External , srcUsed , fCalculateSrcOfs ); // Put a janitor on the input source Janitor<InputSource> janSrc(srcUsed); // If it failed then throw an exception if (!reader) ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId(), fMemoryManager); if (fToCacheGrammar) { unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId()); const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?