xmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页

CPP
1,855
字号
            bbTarget.append(nextCh);        }    }    else    {        // No target, but make sure its terminated ok        if (!fReaderMgr.skippedChar(chQuestion))        {            emitError(XMLErrs::UnterminatedPI);            fReaderMgr.skipPastChar(chCloseAngle);            return;        }        if (!fReaderMgr.skippedChar(chCloseAngle))        {            emitError(XMLErrs::UnterminatedPI);            fReaderMgr.skipPastChar(chCloseAngle);            return;        }    }    // Point the target pointer at the raw data    targetPtr = bbTarget.getRawBuffer();    // If we have a handler, then call it    if (fDocHandler)    {        fDocHandler->docPI        (            namePtr            , targetPtr       );    }    //mark PI is seen within the current element    if (! fElemStack.isEmpty())        fElemStack.setCommentOrPISeen();}//  Scans all the input from the start of the file to the root element.//  There does not have to be anything in the prolog necessarily, but usually//  there is at least an XMLDecl.////  On exit from here we are either at the end of the file or about to read//  the opening < of the root element.void XMLScanner::scanProlog(){    // Get a buffer for whitespace processing    XMLBufBid bbCData(&fBufMgr);    //  Loop through the prolog. If there is no content, this could go all    //  the way to the end of the file.    try    {        while (true)        {            const XMLCh nextCh = fReaderMgr.peekNextChar();            if (nextCh == chOpenAngle)            {                //  Ok, it could be the xml decl, a comment, the doc type line,                //  or the start of the root element.                if (checkXMLDecl(true))                {                    // There shall be at lease --ONE-- space in between                    // the tag '<?xml' and the VersionInfo.                    //                    //  If we are not at line 1, col 6, then the decl was not                    //  the first text, so its invalid.                    const XMLReader* curReader = fReaderMgr.getCurrentReader();                    if ((curReader->getLineNumber() != 1)                    ||  (curReader->getColumnNumber() != 7))                    {                        emitError(XMLErrs::XMLDeclMustBeFirst);                    }                    scanXMLDecl(Decl_XML);                }                else if (fReaderMgr.skippedString(XMLUni::fgPIString))                {                    scanPI();                }                 else if (fReaderMgr.skippedString(XMLUni::fgCommentString))                {                    scanComment();                }                 else if (fReaderMgr.skippedString(XMLUni::fgDocTypeString))                {                    scanDocTypeDecl();                    // if reusing grammar, this has been validated already in first scan                    // skip for performance                    if (fValidate && !fGrammar->getValidated()) {                        //  validate the DTD scan so far                        fValidator->preContentValidation(fUseCachedGrammar, true);                    }                }                else                {                    // Assume its the start of the root element                    return;                }            }            else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))            {                //  If we have a document handler then gather up the                //  whitespace and call back. Otherwise just skip over spaces.                if (fDocHandler)                {                    fReaderMgr.getSpaces(bbCData.getBuffer());                    fDocHandler->ignorableWhitespace                    (                        bbCData.getRawBuffer()                        , bbCData.getLen()                        , false                    );                }                 else                {                    fReaderMgr.skipPastSpaces();                }            }             else            {                emitError(XMLErrs::InvalidDocumentStructure);                // Watch for end of file and break out                if (!nextCh)                    break;                else                    fReaderMgr.skipPastChar(chCloseAngle);            }        }    }    catch(const EndOfEntityException&)    {        //  We should never get an end of entity here. They should only        //  occur within the doc type scanning method, and not leak out to        //  here.        emitError        (            XMLErrs::UnexpectedEOE            , "in prolog"        );    }}//  Scans the <?xml .... ?> line. This stuff is all sequential so we don't//  do any state machine loop here. We just bull straight through it. It ends//  past the closing bracket. If there is a document handler, then its called//  on the XMLDecl callback.////  On entry, the <?xml has been scanned, and we pick it up from there.////  NOTE: In order to provide good recovery from bad XML here, we try to be//  very flexible. No matter what order the stuff is in, we'll keep going//  though we'll issue errors.////  The parameter tells us which type of decl we should expect, Text or XML.//    [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'//    [77] TextDecl::= '<?xml' VersionInfo? EncodingDecl S? '?>'void XMLScanner::scanXMLDecl(const DeclTypes type){    // Get us some buffers to use    XMLBufBid bbVersion(&fBufMgr);    XMLBufBid bbEncoding(&fBufMgr);    XMLBufBid bbStand(&fBufMgr);    XMLBufBid bbDummy(&fBufMgr);    XMLBufBid bbName(&fBufMgr);    //  We use this little enum and array to keep up with what we found    //  and what order we found them in. This lets us get them free form    //  without too much overhead, but still know that they were in the    //  wrong order.    enum Strings    {        VersionString        , EncodingString        , StandaloneString        , UnknownString        , StringCount    };    int flags[StringCount] = { -1, -1, -1, -1 };    //  Also set up a list of buffers in the right order so that we know    //  where to put stuff.    XMLBuffer* buffers[StringCount] ;    buffers[0] = &bbVersion.getBuffer();    buffers[1] = &bbEncoding.getBuffer();    buffers[2] = &bbStand.getBuffer();    buffers[3] = &bbDummy.getBuffer();    int curCount = 0;    Strings curString;    XMLBuffer& nameBuf = bbName.getBuffer();    while (true)    {        // Skip any spaces        const unsigned int spaceCount = fReaderMgr.skipPastSpaces(true);        // If we are looking at a question mark, then break out        if (fReaderMgr.lookingAtChar(chQuestion))            break;        // If this is not the first string, then we require the spaces        if (!spaceCount && curCount)            emitError(XMLErrs::ExpectedWhitespace);        //  Get characters up to the next whitespace or equal's sign.        if (!scanUpToWSOr(nameBuf, chEqual))            emitError(XMLErrs::ExpectedDeclString);        // See if it matches any of our expected strings        if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgVersionString))            curString = VersionString;        else if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgEncodingString))            curString = EncodingString;        else if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgStandaloneString))            curString = StandaloneString;        else            curString = UnknownString;        //  If its an unknown string, then give that error. Else check to        //  see if this one has been done already and give that error.        if (curString == UnknownString)            emitError(XMLErrs::ExpectedDeclString, nameBuf.getRawBuffer());        else if (flags[curString] != -1)            emitError(XMLErrs::DeclStringRep, nameBuf.getRawBuffer());        else if (flags[curString] == -1)            flags[curString] = ++curCount;        //  Scan for an equal's sign. If we don't find it, issue an error        //  but keep trying to go on.        if (!scanEq(true))            emitError(XMLErrs::ExpectedEqSign);        //  Get a quote string into the buffer for the string that we are        //  currently working on.        if (!getQuotedString(*buffers[curString]))        {            emitError(XMLErrs::ExpectedQuotedString);            fReaderMgr.skipPastChar(chCloseAngle);            return;        }        // And validate the value according which one it was        const XMLCh* rawValue = buffers[curString]->getRawBuffer();        if (curString == VersionString)        {            if (XMLString::equals(rawValue, XMLUni::fgVersion1_1)) {                if (type == Decl_XML) {                	fXMLVersion = XMLReader::XMLV1_1;                    fReaderMgr.setXMLVersion(XMLReader::XMLV1_1);                }                else {            	    if (fXMLVersion != XMLReader::XMLV1_1)            	        emitError(XMLErrs::UnsupportedXMLVersion, rawValue);                            	}            }            else if (XMLString::equals(rawValue, XMLUni::fgVersion1_0)) {                if (type == Decl_XML) {                	fXMLVersion = XMLReader::XMLV1_0;                    fReaderMgr.setXMLVersion(XMLReader::XMLV1_0);                                    }            }            else                emitError(XMLErrs::UnsupportedXMLVersion, rawValue);        }         else if (curString == EncodingString)        {            if (!XMLString::isValidEncName(rawValue))                emitError(XMLErrs::BadXMLEncoding, rawValue);        }         else if (curString == StandaloneString)        {            if (XMLString::equals(rawValue, XMLUni::fgYesString))                fStandalone = true;            else if (XMLString::equals(rawValue, XMLUni::fgNoString))                fStandalone = false;            else            {                emitError(XMLErrs::BadStandalone);                if (!XMLString::compareIString(rawValue, XMLUni::fgYesString))                    fStandalone = true;                else if (!XMLString::compareIString(rawValue, XMLUni::fgNoString))                    fStandalone = false;            }        }    }    //  Make sure that the strings present are in order. We don't care about    //  which ones are present at this point, just that any there are in the    //  right order.    int curTop = 0;    for (int index = VersionString; index < StandaloneString; index++)    {        if (flags[index] != -1)        {            if (flags[index] !=  curTop + 1)            {                emitError(XMLErrs::DeclStringsInWrongOrder);                break;            }            curTop = flags[index];        }    }    //  If its an XML decl, the version must be present.    //  If its a Text decl, then encoding must be present AND standalone must not be present.    if ((type == Decl_XML) && (flags[VersionString] == -1))        emitError(XMLErrs::XMLVersionRequired);    else if (type == Decl_Text) {        if (flags[StandaloneString] != -1)            emitError(XMLErrs::StandaloneNotLegal);        if (flags[EncodingString] == -1)            emitError(XMLErrs::EncodingRequired);    }    if (!fReaderMgr.skippedChar(chQuestion))    {        emitError(XMLErrs::UnterminatedXMLDecl);        fReaderMgr.skipPastChar(chCloseAngle);    }     else if (!fReaderMgr.skippedChar(chCloseAngle))    {        emitError(XMLErrs::UnterminatedXMLDecl);        fReaderMgr.skipPastChar(chCloseAngle);    }    //  Do this before we possibly update the reader with the    //  actual encoding string. Otherwise, we will pass the wrong thing    //  for the last parameter!    const XMLCh* actualEnc = fReaderMgr.getCurrentEncodingStr();    //  Ok, we've now seen the real encoding string, if there was one, so    //  lets call back on the current reader and tell it what the real    //  encoding string was. If it fails, that's because it represents some    //  sort of contradiction with the autosensed format, and it keeps the    //  original encoding.    //    //  NOTE: This can fail for a number of reasons, such as a bogus encoding    //  name or because its in flagrant contradiction of the auto-sensed    //  format.    if (flags[EncodingString] != -1)    {        if (!fReaderMgr.getCurrentReader()->setEncoding(bbEncoding.getRawBuffer()))            emitError(XMLErrs::ContradictoryEncoding, bbEncoding.getRawBuffer());        else            actualEnc = bbEncoding.getRawBuffer();    }    //  If we have a document handler then call the XML Decl callback.    if (type == Decl_XML)    {        if (fDocHandler)            fDocHandler->XMLDecl            (                bbVersion.getRawBuffer()                , bbEncoding.getRawBuffer()                , bbStand.getRawBuffer()                , actualEnc            );    }    else if (type == Decl_Text)    {        if (fDocTypeHandler)            fDocTypeHandler->TextDecl

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?