wfxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,883 行 · 第 1/5 页

CPP
1,883
字号
    (        elemDecl->getElementName()->getPrefix()        , ElemStack::Mode_Element    );    // Now we can update the element stack    fElemStack.setCurrentURI(uriId);    // Tell the document handler about this start tag    if (fDocHandler)    {        fDocHandler->startElement        (            *elemDecl            , uriId            , elemDecl->getElementName()->getPrefix()            , *fAttrList            , attCount            , false            , isRoot        );    }    //  If empty, validate content right now if we are validating and then    //  pop the element stack top. Else, we have to update the current stack    //  top's namespace mapping elements.    if (isEmpty)    {        // Pop the element stack back off since it'll never be used now        fElemStack.popTop();        // If we have a doc handler, tell it about the end tag        if (fDocHandler)        {            fDocHandler->endElement            (                *elemDecl                , uriId                , isRoot                , elemDecl->getElementName()->getPrefix()            );        }        // If the elem stack is empty, then it was an empty root        if (isRoot)            gotData = false;    }    return true;}unsigned intWFXMLScanner::resolveQName(const   XMLCh* const qName                           ,       XMLBuffer&   prefixBuf                           , const short        mode                           ,       int&         prefixColonPos){    //  Lets split out the qName into a URI and name buffer first. The URI    //  can be empty.    prefixColonPos = XMLString::indexOf(qName, chColon);    if (prefixColonPos == -1)    {        //  Its all name with no prefix, so put the whole thing into the name        //  buffer. Then map the empty string to a URI, since the empty string        //  represents the default namespace. This will either return some        //  explicit URI which the default namespace is mapped to, or the        //  the default global namespace.        bool unknown = false;        prefixBuf.reset();        return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown);    }    else    {        //  Copy the chars up to but not including the colon into the prefix        //  buffer.        prefixBuf.set(qName, prefixColonPos);        //  Watch for the special namespace prefixes. We always map these to        //  special URIs. 'xml' gets mapped to the official URI that its defined        //  to map to by the NS spec. xmlns gets mapped to a special place holder        //  URI that we define (so that it maps to something checkable.)        const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer();        if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) {            // if this is an element, it is an error to have xmlns as prefix            if (mode == ElemStack::Mode_Element)                emitError(XMLErrs::NoXMLNSAsElementPrefix, qName);            return fXMLNSNamespaceId;        }        else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) {            return  fXMLNamespaceId;        }        else        {            bool unknown = false;            unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown);            if (unknown)                emitError(XMLErrs::UnknownPrefix, prefixRawBuf);            return uriId;        }    }}// ---------------------------------------------------------------------------//  XMLScanner: Private parsing methods// ---------------------------------------------------------------------------bool WFXMLScanner::scanAttValue(const XMLCh* const attrName                              ,     XMLBuffer&   toFill){    // Reset the target buffer    toFill.reset();    // Get the next char which must be a single or double quote    XMLCh quoteCh;    if (!fReaderMgr.skipIfQuote(quoteCh))        return false;    //  We have to get the current reader because we have to ignore closing    //  quotes until we hit the same reader again.    const unsigned int curReader = fReaderMgr.getCurrentReaderNum();    //  Loop until we get the attribute value. Note that we use a double    //  loop here to avoid the setup/teardown overhead of the exception    //  handler on every round.    XMLCh   nextCh;    XMLCh   secondCh = 0;    bool    gotLeadingSurrogate = false;    bool    escaped;    while (true)    {    try    {        while(true)        {            nextCh = fReaderMgr.getNextChar();            if (!nextCh)                ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);            // Check for our ending quote in the same entity            if (nextCh == quoteCh)            {                if (curReader == fReaderMgr.getCurrentReaderNum())                    return true;                // Watch for spillover into a previous entity                if (curReader > fReaderMgr.getCurrentReaderNum())                {                    emitError(XMLErrs::PartialMarkupInEntity);                    return false;                }            }            //  Check for an entity ref now, before we let it affect our            //  whitespace normalization logic below. We ignore the empty flag            //  in this one.            escaped = false;            if (nextCh == chAmpersand)            {                if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)                {                    gotLeadingSurrogate = false;                    continue;                }            }            else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))            {                // Deal with surrogate pairs                //  Its a leading surrogate. If we already got one, then                //  issue an error, else set leading flag to make sure that                //  we look for a trailing next time.                if (gotLeadingSurrogate)                {                    emitError(XMLErrs::Expected2ndSurrogateChar);                }                else                    gotLeadingSurrogate = true;            }            else            {                //  If its a trailing surrogate, make sure that we are                //  prepared for that. Else, its just a regular char so make                //  sure that we were not expected a trailing surrogate.                if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))                {                    // Its trailing, so make sure we were expecting it                    if (!gotLeadingSurrogate)                        emitError(XMLErrs::Unexpected2ndSurrogateChar);                }                else                {                    //  Its just a char, so make sure we were not expecting a                    //  trailing surrogate.                    if (gotLeadingSurrogate) {                        emitError(XMLErrs::Expected2ndSurrogateChar);                    }                    // Its got to at least be a valid XML character                    else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))                    {                        XMLCh tmpBuf[9];                        XMLString::binToText                        (                            nextCh                            , tmpBuf                            , 8                            , 16                            , fMemoryManager                        );                        emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);                    }                }                gotLeadingSurrogate = false;            }            //  If its not escaped, then make sure its not a < character, which            //  is not allowed in attribute values.            if (!escaped) {                if (nextCh == chOpenAngle)                    emitError(XMLErrs::BracketInAttrValue, attrName);                else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))                    nextCh = chSpace;            }            // Else add it to the buffer            toFill.append(nextCh);            if (secondCh)            {                toFill.append(secondCh);                secondCh=0;            }        }    }    catch(const EndOfEntityException&)    {        // Just eat it and continue.        gotLeadingSurrogate = false;        escaped = false;    }    }    return true;}//  This method scans a CDATA section. It collects the character into one//  of the temp buffers and calls the document handler, if any, with the//  characters. It assumes that the <![CDATA string has been scanned before//  this call.void WFXMLScanner::scanCDSection(){    static const XMLCh CDataClose[] =    {            chCloseSquare, chCloseAngle, chNull    };    //  The next character should be the opening square bracket. If not    //  issue an error, but then try to recover by skipping any whitespace    //  and checking again.    if (!fReaderMgr.skippedChar(chOpenSquare))    {        emitError(XMLErrs::ExpectedOpenSquareBracket);        fReaderMgr.skipPastSpaces();        // If we still don't find it, then give up, else keep going        if (!fReaderMgr.skippedChar(chOpenSquare))            return;    }    // Get a buffer for this    XMLBufBid bbCData(&fBufMgr);    //  We just scan forward until we hit the end of CDATA section sequence.    //  CDATA is effectively a big escape mechanism so we don't treat markup    //  characters specially here.    bool            emittedError = false;    bool    gotLeadingSurrogate = false;    while (true)    {        const XMLCh nextCh = fReaderMgr.getNextChar();        // Watch for unexpected end of file        if (!nextCh)        {            emitError(XMLErrs::UnterminatedCDATASection);            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);        }        //  If this is a close square bracket it could be our closing        //  sequence.        if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))        {            //  make sure we were not expecting a trailing surrogate.            if (gotLeadingSurrogate)                emitError(XMLErrs::Expected2ndSurrogateChar);            // If we have a doc handler, call it            if (fDocHandler)            {                fDocHandler->docCharacters                (                    bbCData.getRawBuffer()                    , bbCData.getLen()                    , true                );            }            // And we are done            break;        }        //  Make sure its a valid character. But if we've emitted an error        //  already, don't bother with the overhead since we've already told        //  them about it.        if (!emittedError)        {            // Deal with surrogate pairs            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))            {                //  Its a leading surrogate. If we already got one, then                //  issue an error, else set leading flag to make sure that                //  we look for a trailing next time.                if (gotLeadingSurrogate)                    emitError(XMLErrs::Expected2ndSurrogateChar);                else                    gotLeadingSurrogate = true;            }            else            {                //  If its a trailing surrogate, make sure that we are                //  prepared for that. Else, its just a regular char so make                //  sure that we were not expected a trailing surrogate.                if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))                {                    // Its trailing, so make sure we were expecting it                    if (!gotLeadingSurrogate)                        emitError(XMLErrs::Unexpected2ndSurrogateChar);                }                else                {                    //  Its just a char, so make sure we were not expecting a                    //  trailing surrogate.                    if (gotLeadingSurrogate)                        emitError(XMLErrs::Expected2ndSurrogateChar);                    // Its got to at least be a valid XML character                    else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))                    {                        XMLCh tmpBuf[9];                        XMLString::binToText                        (                            nextCh                            , tmpBuf                            , 8                            , 16                            , fMemoryManager                        );                        emitError(XMLErrs::InvalidCharacter, tmpBuf);                        emittedError = true;                    }                }                gotLeadingSurrogate = false;            }        }        // Add it to the buffer        bbCData.append(nextCh);    }}void WFXMLScanner::scanCharData(XML

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?