dgxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,902 行 · 第 1/5 页
CPP
1,902 行
// This method handles the high level logic of scanning the DOCType// declaration. This calls the DTDScanner and kicks off both the scanning of// the internal subset and the scanning of the external subset, if any.//// When we get here the '<!DOCTYPE' part has already been scanned, which is// what told us that we had a doc type decl to parse.void DGXMLScanner::scanDocTypeDecl(){ if (fDocTypeHandler) fDocTypeHandler->resetDocType(); // There must be some space after DOCTYPE if (!fReaderMgr.skipPastSpaces()) { emitError(XMLErrs::ExpectedWhitespace); // Just skip the Doctype declaration and return fReaderMgr.skipPastChar(chCloseAngle); return; } // Get a buffer for the root element XMLBufBid bbRootName(&fBufMgr); // Get a name from the input, which should be the name of the root // element of the upcoming content. fReaderMgr.getName(bbRootName.getBuffer()); if (bbRootName.isEmpty()) { emitError(XMLErrs::NoRootElemInDOCTYPE); fReaderMgr.skipPastChar(chCloseAngle); return; } // Store the root element name for later check setRootElemName(bbRootName.getRawBuffer()); // This element obviously is not going to exist in the element decl // pool yet, but we need to call docTypeDecl. So force it into // the element decl pool, marked as being there because it was in // the DOCTYPE. Later, when its declared, the status will be updated. // // Only do this if we are not reusing the validator! If we are reusing, // then look it up instead. It has to exist! DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl ( bbRootName.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fGrammarPoolMemoryManager ); Janitor<DTDElementDecl> rootDeclJanitor(rootDecl); rootDecl->setCreateReason(DTDElementDecl::AsRootElem); rootDecl->setExternalElemDeclaration(true); if(!fUseCachedGrammar) { // this will break getRootElemId on DTDGrammar when // cached grammars are in use, but // why would one use this anyway??? ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl)); rootDeclJanitor.release(); } else { // put this in the undeclared pool so it gets deleted... XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer()); if (elemDecl) { rootDecl->setId(elemDecl->getId()); } else { rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl)); rootDeclJanitor.release(); } } // Skip any spaces after the name fReaderMgr.skipPastSpaces(); // And now if we are looking at a >, then we are done. It is not // required to have an internal or external subset, though why you // would not escapes me. if (fReaderMgr.skippedChar(chCloseAngle)) { // If we have a doc type handler and advanced callbacks are enabled, // call the doctype event. if (fDocTypeHandler) fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false); return; } // either internal/external subset if (fValScheme == Val_Auto && !fValidate) fValidate = true; bool hasIntSubset = false; bool hasExtSubset = false; XMLCh* sysId = 0; XMLCh* pubId = 0; DTDScanner dtdScanner ( (DTDGrammar*) fGrammar , fDocTypeHandler , fGrammarPoolMemoryManager , fMemoryManager ); dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr); // If the next character is '[' then we have no external subset cause // there is no system id, just the opening character of the internal // subset. Else, has to be an id. // // Just look at the next char, don't eat it. if (fReaderMgr.peekNextChar() == chOpenSquare) { hasIntSubset = true; } else { // Indicate we have an external subset hasExtSubset = true; fHasNoDTD = false; // Get buffers for the ids XMLBufBid bbPubId(&fBufMgr); XMLBufBid bbSysId(&fBufMgr); // Get the external subset id if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External)) { fReaderMgr.skipPastChar(chCloseAngle); return; } // Get copies of the ids we got pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager); sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager); // Skip spaces and check again for the opening of an internal subset fReaderMgr.skipPastSpaces(); // Just look at the next char, don't eat it. if (fReaderMgr.peekNextChar() == chOpenSquare) { hasIntSubset = true; } } // Insure that the ids get cleaned up, if they got allocated ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager); ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager); // If we have a doc type handler and advanced callbacks are enabled, // call the doctype event. if (fDocTypeHandler) fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset); // Ok, if we had an internal subset, we are just past the [ character // and need to parse that first. if (hasIntSubset) { // Eat the opening square bracket fReaderMgr.getNextChar(); checkInternalDTD(hasExtSubset, sysId); // And try to scan the internal subset. If we fail, try to recover // by skipping forward tot he close angle and returning. if (!dtdScanner.scanInternalSubset()) { fReaderMgr.skipPastChar(chCloseAngle); return; } // Do a sanity check that some expanded PE did not propogate out of // the doctype. This could happen if it was terminated early by bad // syntax. if (fReaderMgr.getReaderDepth() > 1) { emitError(XMLErrs::PEPropogated); // Ask the reader manager to pop back down to the main level fReaderMgr.cleanStackBackTo(1); } fReaderMgr.skipPastSpaces(); } // And that should leave us at the closing > of the DOCTYPE line if (!fReaderMgr.skippedChar(chCloseAngle)) { // Do a special check for the common scenario of an extra ] char at // the end. This is easy to recover from. if (fReaderMgr.skippedChar(chCloseSquare) && fReaderMgr.skippedChar(chCloseAngle)) { emitError(XMLErrs::ExtraCloseSquare); } else { emitError(XMLErrs::UnterminatedDOCTYPE); fReaderMgr.skipPastChar(chCloseAngle); } } // If we had an external subset, then we need to deal with that one // next. If we are reusing the validator, then don't scan it. if (hasExtSubset) { if (fUseCachedGrammar) { InputSource* sysIdSrc = resolveSystemId(sysId); Janitor<InputSource> janSysIdSrc(sysIdSrc); Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId()); if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) { fDTDGrammar = (DTDGrammar*) grammar; fGrammar = fDTDGrammar; fValidator->setGrammar(fGrammar); // we *cannot* identify the root element on // cached grammars; else we risk breaking multithreaded // applications. - NG /******* rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); if (rootDecl) ((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId()); else { rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl ( bbRootName.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fGrammarPoolMemoryManager ); rootDecl->setCreateReason(DTDElementDecl::AsRootElem); rootDecl->setExternalElemDeclaration(true); ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl)); } *********/ return; } } if (fLoadExternalDTD || fValidate) { // And now create a reader to read this entity InputSource* srcUsed; XMLReader* reader = fReaderMgr.createReader ( sysId , pubId , false , XMLReader::RefFrom_NonLiteral , XMLReader::Type_General , XMLReader::Source_External , srcUsed , fCalculateSrcOfs ); // Put a janitor on the input source Janitor<InputSource> janSrc(srcUsed); // If it failed then throw an exception if (!reader) ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId(), fMemoryManager); if (fToCacheGrammar) { unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId()); const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId); fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString); ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr); fGrammarResolver->putGrammar(fGrammar); } // In order to make the processing work consistently, we have to // make this look like an external entity. So create an entity // decl and fill it in and push it with the reader, as happens // with an external entity. Put a janitor on it to insure it gets // cleaned up. The reader manager does not adopt them. const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull }; DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager); declDTD->setSystemId(sysId); Janitor<DTDEntityDecl> janDecl(declDTD); // Mark this one as a throw at end reader->setThrowAtEnd(true); // And push it onto the stack, with its pseudo name fReaderMgr.pushReader(reader, declDTD); // Tell it its not in an include section dtdScanner.scanExtSubsetDecl(false, true); } }}bool DGXMLScanner::scanStartTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the root and its empty. gotData = true; // Get the QName. In this case, we are not doing namespaces, so we just // use it as is and don't have to break it into parts. if (!fReaderMgr.getName(fQNameBuf)) { emitError(XMLErrs::ExpectedElementName); fReaderMgr.skipToChar(chOpenAngle); return false; } // Assume it won't be an empty tag bool isEmpty = false; // See if its the root element const bool isRoot = fElemStack.isEmpty(); // Lets try to look up the element in the validator's element decl pool // We can pass bogus values for the URI id and the base name. We know that // this can only be called if we are doing a DTD style validator and that // he will only look at the QName. // // We *do not* tell him to fault in a decl if he does not find one - NG. bool wasAdded = false; const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); XMLElementDecl* elemDecl = fGrammar->getElemDecl ( fEmptyNamespaceId , 0 , qnameRawBuf , Grammar::TOP_LEVEL_SCOPE ); // look in the undeclared pool: if(!elemDecl) { elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf); } if(!elemDecl) { wasAdded = true; elemDecl = new (fMemoryManager) DTDElementDecl ( qnameRawBuf , fEmptyNamespaceId , DTDElementDecl::Any , fMemoryManager ); elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl)); } if (fValidate) { if (wasAdded) { // This is to tell the reuse Validator that this element was // faulted-in, was not an element in the validator pool originally elemDecl->setCreateReason(XMLElementDecl::JustFaultIn); fValidator->emitError ( XMLValid::ElementNotDefined , qnameRawBuf ); } // If its not marked declared, then emit an error else if (!elemDecl->isDeclared()) { fValidator->emitError ( XMLValid::ElementNotDefined , qnameRawBuf ); }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?