wfxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,883 行 · 第 1/5 页
CPP
1,883 行
XMLErrs::XMLException_Error , excToCatch.getType() , excToCatch.getMessage() ); } catch(const OutOfMemoryException&) { throw; } catch(...) { // Reset and rethrow user error fReaderMgr.reset(); throw; } // Reset and return failure fReaderMgr.reset(); return false; } catch(const OutOfMemoryException&) { throw; } catch(...) { // Reset and rethrow original error fReaderMgr.reset(); throw; } // If we hit the end, then flush the reader manager if (!retVal) fReaderMgr.reset(); return retVal;}// ---------------------------------------------------------------------------// WFXMLScanner: Private helper methods.// ---------------------------------------------------------------------------// This method handles the common initialization, to avoid having to do// it redundantly in multiple constructors.void WFXMLScanner::commonInit(){ fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager); fAttrNameHashList = new (fMemoryManager)ValueVectorOf<unsigned int>(16, fMemoryManager); fAttrNSList = new (fMemoryManager) ValueVectorOf<XMLAttr*>(8, fMemoryManager); fElements = new (fMemoryManager) RefVectorOf<XMLElementDecl>(32, true, fMemoryManager); fElementLookup = new (fMemoryManager) RefHashTableOf<XMLElementDecl>(109, false, fMemoryManager); // Add the default entity entries for the character refs that must always // be present. fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand); fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle); fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle); fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote); fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);}void WFXMLScanner::cleanUp(){ delete fEntityTable; delete fAttrNameHashList; delete fAttrNSList; delete fElementLookup; delete fElements;}unsigned intWFXMLScanner::resolvePrefix(const XMLCh* const prefix , const ElemStack::MapModes mode){ // Watch for the special namespace prefixes. We always map these to // special URIs. 'xml' gets mapped to the official URI that its defined // to map to by the NS spec. xmlns gets mapped to a special place holder // URI that we define (so that it maps to something checkable.) if (XMLString::equals(prefix, XMLUni::fgXMLNSString)) return fXMLNSNamespaceId; else if (XMLString::equals(prefix, XMLUni::fgXMLString)) return fXMLNamespaceId; // Ask the element stack to search up itself for a mapping for the // passed prefix. bool unknown; unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown); // If it was unknown, then the URI was faked in but we have to issue an error if (unknown) emitError(XMLErrs::UnknownPrefix, prefix); return uriId;}// This method will reset the scanner data structures, and related plugged// in stuff, for a new scan session. We get the input source for the primary// XML entity, create the reader for it, and push it on the stack so that// upon successful return from here we are ready to go.void WFXMLScanner::scanReset(const InputSource& src){ // For all installed handlers, send reset events. This gives them // a chance to flush any cached data. if (fDocHandler) fDocHandler->resetDocument(); if (fEntityHandler) fEntityHandler->resetEntities(); if (fErrorReporter) fErrorReporter->resetErrors(); // Reset the element stack, and give it the latest ids for the special // URIs it has to know about. fElemStack.reset ( fEmptyNamespaceId , fUnknownNamespaceId , fXMLNamespaceId , fXMLNSNamespaceId ); // Reset some status flags fInException = false; fStandalone = false; fErrorCount = 0; fHasNoDTD = true; fElementIndex = 0; // Reset elements lookup table fElementLookup->removeAll(); // Handle the creation of the XML reader object for this input source. // This will provide us with transcoding and basic lexing services. XMLReader* newReader = fReaderMgr.createReader ( src , true , XMLReader::RefFrom_NonLiteral , XMLReader::Type_General , XMLReader::Source_External , fCalculateSrcOfs ); if (!newReader) { if (src.getIssueFatalErrorIfNotFound()) ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager); else ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager); } // Push this read onto the reader manager fReaderMgr.pushReader(newReader, 0); // and reset security-related things if necessary: if(fSecurityManager != 0) { fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit(); fEntityExpansionCount = 0; }}// This method is called between markup in content. It scans for character// data that is sent to the document handler. It watches for any markup// characters that would indicate that the character data has ended. It also// handles expansion of general and character entities.//// sendData() is a local static helper for this method which handles some// code that must be done in three different places here.void WFXMLScanner::sendCharData(XMLBuffer& toSend){ // If no data in the buffer, then nothing to do if (toSend.isEmpty()) return; // Always assume its just char data if not validating if (fDocHandler) fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false); // Reset buffer toSend.reset();}// ---------------------------------------------------------------------------// WFXMLScanner: Private scanning methods// ---------------------------------------------------------------------------// This method will kick off the scanning of the primary content of the// document, i.e. the elements.bool WFXMLScanner::scanContent(){ // Go into a loop until we hit the end of the root element, or we fall // out because there is no root element. // // We have to do kind of a deeply nested double loop here in order to // avoid doing the setup/teardown of the exception handler on each // round. Doing it this way we only do it when an exception actually // occurs. bool gotData = true; bool inMarkup = false; while (gotData) { try { while (gotData) { // Sense what the next top level token is. According to what // this tells us, we will call something to handle that kind // of thing. unsigned int orgReader; const XMLTokens curToken = senseNextToken(orgReader); // Handle character data and end of file specially. Char data // is not markup so we don't want to handle it in the loop // below. if (curToken == Token_CharData) { // Scan the character data and call appropriate events. Let // him use our local character data buffer for efficiency. scanCharData(fCDataBuf); continue; } else if (curToken == Token_EOF) { // The element stack better be empty at this point or we // ended prematurely before all elements were closed. if (!fElemStack.isEmpty()) { const ElemStack::StackElem* topElem = fElemStack.popTop(); emitError ( XMLErrs::EndedWithTagsOnStack , topElem->fThisElement->getFullName() ); } // Its the end of file, so clear the got data flag gotData = false; continue; } // We are in some sort of markup now inMarkup = true; // According to the token we got, call the appropriate // scanning method. switch(curToken) { case Token_CData : // Make sure we are within content if (fElemStack.isEmpty()) emitError(XMLErrs::CDATAOutsideOfContent); scanCDSection(); break; case Token_Comment : scanComment(); break; case Token_EndTag : scanEndTag(gotData); break; case Token_PI : scanPI(); break; case Token_StartTag : if (fDoNamespaces) scanStartTagNS(gotData); else scanStartTag(gotData); break; default : fReaderMgr.skipToChar(chOpenAngle); break; } if (orgReader != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialMarkupInEntity); // And we are back out of markup again inMarkup = false; } } catch(const EndOfEntityException& toCatch) { // If we were in some markup when this happened, then its a // partial markup error. if (inMarkup) emitError(XMLErrs::PartialMarkupInEntity); // Send an end of entity reference event if (fDocHandler) fDocHandler->endEntityReference(toCatch.getEntity()); inMarkup = false; } } // It went ok, so return success return true;}void WFXMLScanner::scanEndTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the end of the root element. gotData = true; // Check if the element stack is empty. If so, then this is an unbalanced // element (i.e. more ends than starts, perhaps because of bad text // causing one to be skipped.) if (fElemStack.isEmpty()) { emitError(XMLErrs::MoreEndThanStartTags); fReaderMgr.skipPastChar(chCloseAngle); ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); } // Pop the stack of the element we are supposed to be ending. Remember // that we don't own this. The stack just keeps them and reuses them. unsigned int uriId = (fDoNamespaces) ? fElemStack.getCurrentURI() : fEmptyNamespaceId; const ElemStack::StackElem* topElem = fElemStack.popTop(); // See if it was the root element, to avoid multiple calls below const bool isRoot = fElemStack.isEmpty(); // Make sure that its the end of the element that we expect if (!fReaderMgr.skippedString(topElem->fThisElement->getFullName())) { emitError ( XMLErrs::ExpectedEndOfTagX , topElem->fThisElement->getFullName() ); fReaderMgr.skipPastChar(chCloseAngle); return; } // Make sure we are back on the same reader as where we started if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialTagMarkupError); // Skip optional whitespace fReaderMgr.skipPastSpaces(); // Make sure we find the closing bracket if (!fReaderMgr.skippedChar(chCloseAngle)) { emitError ( XMLErrs::UnterminatedEndTag , topElem->fThisElement->getFullName() ); } // If we have a doc handler, tell it about the end tag if (fDocHandler) { fDocHandler->endElement ( *topElem->fThisElement , uriId , isRoot , topElem->fThisElement->getElementName()->getPrefix() ); } // If this was the root, then done with content gotData = !isRoot;}void WFXMLScanner::scanDocTypeDecl()
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?