xmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页

CPP
1,855
字号
            (                bbVersion.getRawBuffer()                , bbEncoding.getRawBuffer()            );    }}const XMLCh* XMLScanner::getURIText(const   unsigned int    uriId) const{    if (fURIStringPool->exists(uriId)) {        // Look up the URI in the string pool and return its id        const XMLCh* value = fURIStringPool->getValueForId(uriId);        if (!value)            return XMLUni::fgZeroLenString;        return value;    }    else        return XMLUni::fgZeroLenString;}bool XMLScanner::getURIText(  const   unsigned int    uriId                      ,       XMLBuffer&      uriBufToFill) const{    if (fURIStringPool->exists(uriId)) {        // Look up the URI in the string pool and return its id        const XMLCh* value = fURIStringPool->getValueForId(uriId);        if (!value)            return false;        uriBufToFill.set(value);        return true;    }    else        return false;}bool XMLScanner::checkXMLDecl(bool startWithAngle) {    // [23] XMLDecl     ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'    // [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')    //    // [3]  S           ::= (#x20 | #x9 | #xD | #xA)+    if (startWithAngle) {        if (fReaderMgr.peekString(XMLUni::fgXMLDeclString)) {            if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpace)               || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTab)               || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLF)               || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCR))            {                return true;            }            else if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpaceU)               || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTabU)               || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLFU)               || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCRU))            {                //  Just in case, check for upper case. If found, issue                //  an error, but keep going.                emitError(XMLErrs::XMLDeclMustBeLowerCase);                return true;            }        }    }    else {        if (fReaderMgr.peekString(XMLUni::fgXMLString)) {            if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpace)               || fReaderMgr.skippedString(XMLUni::fgXMLStringHTab)               || fReaderMgr.skippedString(XMLUni::fgXMLStringLF)               || fReaderMgr.skippedString(XMLUni::fgXMLStringCR))            {                return true;            }            else if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpaceU)               || fReaderMgr.skippedString(XMLUni::fgXMLStringHTabU)               || fReaderMgr.skippedString(XMLUni::fgXMLStringLFU)               || fReaderMgr.skippedString(XMLUni::fgXMLStringCRU))            {                //  Just in case, check for upper case. If found, issue                //  an error, but keep going.                emitError(XMLErrs::XMLDeclMustBeLowerCase);                return true;            }        }    }    return false;}// ---------------------------------------------------------------------------//  XMLScanner: Grammar preparsing// ---------------------------------------------------------------------------Grammar* XMLScanner::loadGrammar(const   XMLCh* const systemId                                 , const short        grammarType                                 , const bool         toCache){    InputSource* srcToUse = 0;    if (fEntityHandler){        ReaderMgr::LastExtEntityInfo lastInfo;        fReaderMgr.getLastExtEntityInfo(lastInfo);        XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,                            systemId, 0, XMLUni::fgZeroLenString, lastInfo.systemId);        srcToUse = fEntityHandler->resolveEntity(&resourceIdentifier);    }    //  First we try to parse it as a URL. If that fails, we assume its    //  a file and try it that way.    if (!srcToUse) {        try        {            //  Create a temporary URL. Since this is the primary document,            //  it has to be fully qualified. If not, then assume we are just            //  mistaking a file for a URL.            XMLURL tmpURL(fMemoryManager);            if (XMLURL::parse(systemId, tmpURL)) {                            if (tmpURL.isRelative())                {                    if (!fStandardUriConformant)                        srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);                    else {                        // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr                        // emit the error directly                        MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);                        fInException = true;                        emitError                        (                            XMLErrs::XMLException_Fatal                            , e.getType()                            , e.getMessage()                        );                        return 0;                    }                }                else                {                    if (fStandardUriConformant && tmpURL.hasInvalidChar()) {                        MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);                        fInException = true;                        emitError                        (                            XMLErrs::XMLException_Fatal                            , e.getType()                            , e.getMessage()                        );                        return 0;                    }                    srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);                }            }            else                     {                if (!fStandardUriConformant)                    srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);                else {                    // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr                    // emit the error directly                    // lazy bypass ... since all MalformedURLException are fatal, no need to check the type                    MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL);                    fInException = true;                    emitError                    (                        XMLErrs::XMLException_Fatal                        , e.getType()                        , e.getMessage()                    );                    return 0;                }            }        }        catch(const XMLException& excToCatch)        {            //  For any other XMLException,            //  emit the error and catch any user exception thrown from here.            fInException = true;            if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)                emitError                (                    XMLErrs::XMLException_Warning                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)                emitError                (                    XMLErrs::XMLException_Fatal                    , excToCatch.getType()                    , excToCatch.getMessage()                );            else                emitError                (                    XMLErrs::XMLException_Error                    , excToCatch.getType()                    , excToCatch.getMessage()                );                return 0;        }    }    Janitor<InputSource> janSrc(srcToUse);    return loadGrammar(*srcToUse, grammarType, toCache);}Grammar* XMLScanner::loadGrammar(const   char* const systemId                                 , const short       grammarType                                 , const bool        toCache){    // We just delegate this to the XMLCh version after transcoding    XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager);    ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);    return loadGrammar(tmpBuf, grammarType, toCache);}// ---------------------------------------------------------------------------//  XMLScanner: Setter methods// ---------------------------------------------------------------------------void XMLScanner::setURIStringPool(XMLStringPool* const stringPool){    fURIStringPool = stringPool;    fEmptyNamespaceId   = fURIStringPool->addOrFind(XMLUni::fgZeroLenString);    fUnknownNamespaceId = fURIStringPool->addOrFind(XMLUni::fgUnknownURIName);    fXMLNamespaceId     = fURIStringPool->addOrFind(XMLUni::fgXMLURIName);    fXMLNSNamespaceId   = fURIStringPool->addOrFind(XMLUni::fgXMLNSURIName);}// ---------------------------------------------------------------------------//  XMLScanner: Private helper methods// ---------------------------------------------------------------------------/*** * In reusing grammars (cacheing grammar from parse, or use cached grammar), internal * dtd is allowed conditionally. * * In the case of cacheing grammar from parse, it is NOT allowed. * * In the case of use cached grammar, *   if external dtd is present and it is parsed before, then it is not allowed, *   otherwise it is allowed. * ***/void XMLScanner::checkInternalDTD(bool hasExtSubset, const XMLCh* const sysId){    if (fToCacheGrammar)        ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);    if (fUseCachedGrammar && hasExtSubset )    {        InputSource* sysIdSrc = resolveSystemId(sysId);        Janitor<InputSource> janSysIdSrc(sysIdSrc);        Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());        if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType)         {            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);        }    }}//  This method is called after the content scan to insure that all the//  ID/IDREF attributes match up (i.e. that all IDREFs refer to IDs.) This is//  an XML 1.0 rule, so we can do here in the core.void XMLScanner::checkIDRefs(){    //  Iterate the id ref list. If we find any entries here which are used    //  but not declared, then that's an error.    RefHashTableOfEnumerator<XMLRefInfo> refEnum(fValidationContext->getIdRefList(), false, fMemoryManager);    while (refEnum.hasMoreElements())    {        // Get a ref to the current element        const XMLRefInfo& curRef = refEnum.nextElement();        // If its used but not declared, then its an error        if (!curRef.getDeclared() && curRef.getUsed() && fValidate)            fValidator->emitError(XMLValid::IDNotDeclared, curRef.getRefName());    }}//  This just does a simple check that the passed progressive scan token is//  legal for this scanner.bool XMLScanner::isLegalToken(const XMLPScanToken& toCheck){    return ((fScannerId == toCheck.fScannerId)    &&      (fSequenceId == toCheck.fSequenceId));}//  This method will handle figuring out what the next top level token is//  in the input stream. It will return an enumerated value that indicates//  what it believes the next XML level token must be. It will eat as many//  chars are required to figure out what is next.XMLScanner::XMLTokens XMLScanner::senseNextToken(unsigned int& orgReader){    //  Get the next character and use it to guesstimate what the next token    //  is going to be. We turn on end of entity exceptions when we do this    //  in order to catch the scenario where the current entity ended at    //  the > of some markup.    XMLCh nextCh;    {        ThrowEOEJanitor janMgr(&fReaderMgr, true);        nextCh = fReaderMgr.peekNextChar();    }    //  Check for special chars. Start with the most    //  obvious end of file, which should be legal here at top level.    if (!nextCh)        return Token_EOF;    //  If it's not a '<' we must be in content.    //    //  This includes entity references '&' of some sort. These must    //  be character data because that's the only place a reference can    //  occur in content.    if (nextCh != chOpenAngle)        return Token_CharData;    //  Ok it had to have been a '<' character. So get it out of the reader    //  and store the reader number where we saw it, passing it back to the    //  caller.    fReaderMgr.getNextChar();    orgReader = fReaderMgr.getCurrentReaderNum();    //  Ok, so lets go through the things that it could be at this point which    //  are all some form of markup.    nextCh = fReaderMgr.peekNextChar();    if (nextCh == chForwardSlash)    {        fReaderMgr.getNextChar();        return Token_EndTag;    }    else if (nextCh == chBang)    {        static const XMLCh gCDATAStr[] =        {                chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A            ,   chLatin_T, chLatin_A, chNull        };        static const XMLCh gCommentString[] =        {            chBang, chDash, chDash, chNull        };        if (fReaderMgr.skippedString(gCDATAStr))            return Token_CData;        if (fReaderMgr.skippedString(gCommentString))            return Token_Comment;        emitError(XMLErrs::ExpectedCommentOrCDATA);        return Token_Unknown;    }    else if (nextCh == chQuestion)    {        // It must be a PI        fReaderMgr.getNextChar();        return Token_PI;    }    //  Assume its an element name, so return with a start tag token. If it    //  turns out not to be, then it will fail when it can

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?