wfxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,883 行 · 第 1/5 页

CPP
1,883
字号
                    XMLErrs::XMLException_Error                    , excToCatch.getType()                    , excToCatch.getMessage()                );        }        catch(const OutOfMemoryException&)        {            throw;        }        catch(...)        {            // Reset and rethrow user error            fReaderMgr.reset();            throw;        }        // Reset and return failure        fReaderMgr.reset();        return false;    }    catch(const OutOfMemoryException&)    {        throw;    }    catch(...)    {        // Reset and rethrow original error        fReaderMgr.reset();        throw;    }    // If we hit the end, then flush the reader manager    if (!retVal)        fReaderMgr.reset();    return retVal;}// ---------------------------------------------------------------------------//  WFXMLScanner: Private helper methods.// ---------------------------------------------------------------------------//  This method handles the common initialization, to avoid having to do//  it redundantly in multiple constructors.void WFXMLScanner::commonInit(){    fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager);    fAttrNameHashList = new (fMemoryManager)ValueVectorOf<unsigned int>(16, fMemoryManager);    fAttrNSList = new (fMemoryManager) ValueVectorOf<XMLAttr*>(8, fMemoryManager);    fElements = new (fMemoryManager) RefVectorOf<XMLElementDecl>(32, true, fMemoryManager);    fElementLookup = new (fMemoryManager) RefHashTableOf<XMLElementDecl>(109, false, fMemoryManager);    //  Add the default entity entries for the character refs that must always    //  be present.    fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand);    fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle);    fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle);    fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);    fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);}void WFXMLScanner::cleanUp(){    delete fEntityTable;    delete fAttrNameHashList;    delete fAttrNSList;    delete fElementLookup;    delete fElements;}unsigned intWFXMLScanner::resolvePrefix(const   XMLCh* const          prefix                            , const ElemStack::MapModes mode){    //  Watch for the special namespace prefixes. We always map these to    //  special URIs. 'xml' gets mapped to the official URI that its defined    //  to map to by the NS spec. xmlns gets mapped to a special place holder    //  URI that we define (so that it maps to something checkable.)    if (XMLString::equals(prefix, XMLUni::fgXMLNSString))        return fXMLNSNamespaceId;    else if (XMLString::equals(prefix, XMLUni::fgXMLString))        return fXMLNamespaceId;    //  Ask the element stack to search up itself for a mapping for the    //  passed prefix.    bool unknown;    unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown);    // If it was unknown, then the URI was faked in but we have to issue an error    if (unknown)        emitError(XMLErrs::UnknownPrefix, prefix);    return uriId;}//  This method will reset the scanner data structures, and related plugged//  in stuff, for a new scan session. We get the input source for the primary//  XML entity, create the reader for it, and push it on the stack so that//  upon successful return from here we are ready to go.void WFXMLScanner::scanReset(const InputSource& src){    //  For all installed handlers, send reset events. This gives them    //  a chance to flush any cached data.    if (fDocHandler)        fDocHandler->resetDocument();    if (fEntityHandler)        fEntityHandler->resetEntities();    if (fErrorReporter)        fErrorReporter->resetErrors();    //  Reset the element stack, and give it the latest ids for the special    //  URIs it has to know about.    fElemStack.reset    (        fEmptyNamespaceId        , fUnknownNamespaceId        , fXMLNamespaceId        , fXMLNSNamespaceId    );    // Reset some status flags    fInException = false;    fStandalone = false;    fErrorCount = 0;    fHasNoDTD = true;    fElementIndex = 0;    // Reset elements lookup table    fElementLookup->removeAll();    //  Handle the creation of the XML reader object for this input source.    //  This will provide us with transcoding and basic lexing services.    XMLReader* newReader = fReaderMgr.createReader    (        src        , true        , XMLReader::RefFrom_NonLiteral        , XMLReader::Type_General        , XMLReader::Source_External        , fCalculateSrcOfs    );    if (!newReader) {        if (src.getIssueFatalErrorIfNotFound())            ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);        else            ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);    }    // Push this read onto the reader manager    fReaderMgr.pushReader(newReader, 0);    // and reset security-related things if necessary:    if(fSecurityManager != 0)     {        fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();        fEntityExpansionCount = 0;    }}//  This method is called between markup in content. It scans for character//  data that is sent to the document handler. It watches for any markup//  characters that would indicate that the character data has ended. It also//  handles expansion of general and character entities.////  sendData() is a local static helper for this method which handles some//  code that must be done in three different places here.void WFXMLScanner::sendCharData(XMLBuffer& toSend){    // If no data in the buffer, then nothing to do    if (toSend.isEmpty())        return;    // Always assume its just char data if not validating    if (fDocHandler)        fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);    // Reset buffer    toSend.reset();}// ---------------------------------------------------------------------------//  WFXMLScanner: Private scanning methods// ---------------------------------------------------------------------------//  This method will kick off the scanning of the primary content of the//  document, i.e. the elements.bool WFXMLScanner::scanContent(){    //  Go into a loop until we hit the end of the root element, or we fall    //  out because there is no root element.    //    //  We have to do kind of a deeply nested double loop here in order to    //  avoid doing the setup/teardown of the exception handler on each    //  round. Doing it this way we only do it when an exception actually    //  occurs.    bool gotData = true;    bool inMarkup = false;    while (gotData)    {        try        {            while (gotData)            {                //  Sense what the next top level token is. According to what                //  this tells us, we will call something to handle that kind                //  of thing.                unsigned int orgReader;                const XMLTokens curToken = senseNextToken(orgReader);                //  Handle character data and end of file specially. Char data                //  is not markup so we don't want to handle it in the loop                //  below.                if (curToken == Token_CharData)                {                    //  Scan the character data and call appropriate events. Let                    //  him use our local character data buffer for efficiency.                    scanCharData(fCDataBuf);                    continue;                }                else if (curToken == Token_EOF)                {                    //  The element stack better be empty at this point or we                    //  ended prematurely before all elements were closed.                    if (!fElemStack.isEmpty())                    {                        const ElemStack::StackElem* topElem = fElemStack.popTop();                        emitError                        (                            XMLErrs::EndedWithTagsOnStack                            , topElem->fThisElement->getFullName()                        );                    }                    // Its the end of file, so clear the got data flag                    gotData = false;                    continue;                }                // We are in some sort of markup now                inMarkup = true;                //  According to the token we got, call the appropriate                //  scanning method.                switch(curToken)                {                    case Token_CData :                        // Make sure we are within content                        if (fElemStack.isEmpty())                            emitError(XMLErrs::CDATAOutsideOfContent);                        scanCDSection();                        break;                    case Token_Comment :                        scanComment();                        break;                    case Token_EndTag :                        scanEndTag(gotData);                        break;                    case Token_PI :                        scanPI();                        break;                    case Token_StartTag :                        if (fDoNamespaces)                            scanStartTagNS(gotData);                        else                            scanStartTag(gotData);                        break;                    default :                        fReaderMgr.skipToChar(chOpenAngle);                        break;                }                if (orgReader != fReaderMgr.getCurrentReaderNum())                    emitError(XMLErrs::PartialMarkupInEntity);                // And we are back out of markup again                inMarkup = false;            }        }        catch(const EndOfEntityException& toCatch)        {            //  If we were in some markup when this happened, then its a            //  partial markup error.            if (inMarkup)                emitError(XMLErrs::PartialMarkupInEntity);            // Send an end of entity reference event            if (fDocHandler)                fDocHandler->endEntityReference(toCatch.getEntity());            inMarkup = false;        }    }    // It went ok, so return success    return true;}void WFXMLScanner::scanEndTag(bool& gotData){    //  Assume we will still have data until proven otherwise. It will only    //  ever be false if this is the end of the root element.    gotData = true;    //  Check if the element stack is empty. If so, then this is an unbalanced    //  element (i.e. more ends than starts, perhaps because of bad text    //  causing one to be skipped.)    if (fElemStack.isEmpty())    {        emitError(XMLErrs::MoreEndThanStartTags);        fReaderMgr.skipPastChar(chCloseAngle);        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);    }    //  Pop the stack of the element we are supposed to be ending. Remember    //  that we don't own this. The stack just keeps them and reuses them.    unsigned int uriId = (fDoNamespaces)        ? fElemStack.getCurrentURI() : fEmptyNamespaceId;    const ElemStack::StackElem* topElem = fElemStack.popTop();    // See if it was the root element, to avoid multiple calls below    const bool isRoot = fElemStack.isEmpty();    // Make sure that its the end of the element that we expect    if (!fReaderMgr.skippedString(topElem->fThisElement->getFullName()))    {        emitError        (            XMLErrs::ExpectedEndOfTagX            , topElem->fThisElement->getFullName()        );        fReaderMgr.skipPastChar(chCloseAngle);        return;    }    // Make sure we are back on the same reader as where we started    if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())        emitError(XMLErrs::PartialTagMarkupError);    // Skip optional whitespace    fReaderMgr.skipPastSpaces();    // Make sure we find the closing bracket    if (!fReaderMgr.skippedChar(chCloseAngle))    {        emitError        (            XMLErrs::UnterminatedEndTag            , topElem->fThisElement->getFullName()        );    }    // If we have a doc handler, tell it about the end tag    if (fDocHandler)    {        fDocHandler->endElement        (            *topElem->fThisElement            , uriId            , isRoot            , topElem->fThisElement->getElementName()->getPrefix()        );    }    // If this was the root, then done with content    gotData = !isRoot;}void WFXMLScanner::scanDocTypeDecl()

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?