dtdscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,936 行 · 第 1/5 页

CPP
1,936
字号
        fDocTypeHandler->attDef(parentElem, *decl, isIgnored);    return decl;}void DTDScanner::scanAttListDecl(){    // Space is required here, so check for a PE ref    if (!checkForPERef(false, true))    {        fScanner->emitError(XMLErrs::ExpectedWhitespace);        fReaderMgr->skipPastChar(chCloseAngle);        return;    }    //    //  Next should be the name of the element it belongs to, so get a buffer    //  and get the name into it.    //    XMLBufBid bbName(fBufMgr);    if (!fReaderMgr->getName(bbName.getBuffer()))    {        fScanner->emitError(XMLErrs::ExpectedElementName);        fReaderMgr->skipPastChar(chCloseAngle);        return;    }    //    //  Find this element's declaration. If it has not been declared yet,    //  we will force one into the list, but not mark it as declared.    //    DTDElementDecl* elemDecl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);    if (!elemDecl)    {        //        //  Lets fault in a declaration and add it to the pool. We mark        //  it having been created because of an attlist. Later, if its        //  declared, this will be updated.        //        elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl        (            bbName.getRawBuffer()            , fEmptyNamespaceId            , DTDElementDecl::Any            , fGrammarPoolMemoryManager        );        elemDecl->setCreateReason(XMLElementDecl::AttList);        elemDecl->setExternalElemDeclaration(isReadingExternalEntity());        fDTDGrammar->putElemDecl((XMLElementDecl*) elemDecl);    }    // If we have a doc type handler, tell it the att list is starting    if (fDocTypeHandler)        fDocTypeHandler->startAttList(*elemDecl);    //    //  Now we loop until we are done with all of the attributes in this    //  list. We need a buffer to use for local processing.    //    XMLBufBid   bbTmp(fBufMgr);    XMLBuffer&  tmpBuf = bbTmp.getBuffer();    bool        seenAnId = false;    while (true)    {        // Get the next char out and see what it tells us to do        const XMLCh nextCh = fReaderMgr->peekNextChar();        // Watch for EOF        if (!nextCh)            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);        if (nextCh == chCloseAngle)        {            // We are done with this attribute list            fReaderMgr->getNextChar();            break;        }         else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))        {            //            //  If advanced callbacks are enabled and we have a doc            //  type handler, then gather up the white space and call            //  back on the doctype handler. Otherwise, just skip            //  whitespace.            //            if (fDocTypeHandler)            {                fReaderMgr->getSpaces(tmpBuf);                fDocTypeHandler->doctypeWhitespace                (                    tmpBuf.getRawBuffer()                    , tmpBuf.getLen()                );            }             else            {                fReaderMgr->skipPastSpaces();            }        }         else if (nextCh == chPercent)        {            // Eat the percent and expand the ref            fReaderMgr->getNextChar();            expandPERef(false, false, true);        }         else        {            //            //  It must be an attribute name, so scan it. We let            //  it use our local buffer for its name scanning.            //            XMLAttDef* attDef = scanAttDef(*elemDecl, tmpBuf);            if (!attDef)            {                fReaderMgr->skipPastChar(chCloseAngle);                break;            }            //            //  If we are validating and its an ID type, then we have to            //  make sure that we have not seen an id attribute yet. Set            //  the flag to say that we've seen one now also.            //            if (fScanner->getDoValidation())            {                if (attDef->getType() == XMLAttDef::ID)                {                    if (seenAnId)                        fScanner->getValidator()->emitError(XMLValid::MultipleIdAttrs, elemDecl->getFullName());                    seenAnId = true;                }            }        }    }    // If we have a doc type handler, tell it the att list is ending    if (fDocTypeHandler)        fDocTypeHandler->endAttList(*elemDecl);}////  This method is called to scan the value of an attribute in content. This//  involves some normalization and replacement of general entity and//  character references.////  End of entity's must be dealt with here. During DTD scan, they can come//  from external entities. During content, they can come from any entity.//  We just eat the end of entity and continue with our scan until we come//  to the closing quote. If an unterminated value causes us to go through//  subsequent entities, that will cause errors back in the calling code,//  but there's little we can do about it here.//bool DTDScanner::scanAttValue(const   XMLCh* const        attrName                                ,       XMLBuffer&          toFill                                , const XMLAttDef::AttTypes type){    enum States    {        InWhitespace        , InContent    };    // Reset the target buffer    toFill.reset();    // Get the next char which must be a single or double quote    XMLCh quoteCh;    if (!fReaderMgr->skipIfQuote(quoteCh))        return false;    //    //  We have to get the current reader because we have to ignore closing    //  quotes until we hit the same reader again.    //    const unsigned int curReader = fReaderMgr->getCurrentReaderNum();    //    //  Loop until we get the attribute value. Note that we use a double    //  loop here to avoid the setup/teardown overhead of the exception    //  handler on every round.    //    XMLCh   nextCh;    XMLCh   secondCh = 0;    States  curState = InContent;    bool    firstNonWS = false;    bool    gotLeadingSurrogate = false;    bool    escaped;    while (true)    {    try    {        while(true)        {            nextCh = fReaderMgr->getNextChar();            if (!nextCh)                ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);            // Check for our ending quote in the same entity            if (nextCh == quoteCh)            {                if (curReader == fReaderMgr->getCurrentReaderNum())                    return true;                // Watch for spillover into a previous entity                if (curReader > fReaderMgr->getCurrentReaderNum())                {                    fScanner->emitError(XMLErrs::PartialMarkupInEntity);                    return false;                }            }            //            //  Check for an entity ref now, before we let it affect our            //  whitespace normalization logic below. We ignore the empty flag            //  in this one.            //            escaped = false;            if (nextCh == chAmpersand)            {                if (scanEntityRef(nextCh, secondCh, escaped) != EntityExp_Returned)                {                    gotLeadingSurrogate = false;                    continue;                }            }            else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))            {                // Check for correct surrogate pairs                if (gotLeadingSurrogate)                    fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);                else                    gotLeadingSurrogate = true;            }             else            {                if (gotLeadingSurrogate)                {                    if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))                        fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);                }                // Its got to at least be a valid XML character                else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))                {                    XMLCh tmpBuf[9];                    XMLString::binToText                    (                        nextCh                        , tmpBuf                        , 8                        , 16                        , fMemoryManager                    );                    fScanner->emitError                    (                        XMLErrs::InvalidCharacterInAttrValue                        , attrName                        , tmpBuf                    );                }                gotLeadingSurrogate = false;            }            //            //  If its not escaped, then make sure its not a < character, which            //  is not allowed in attribute values.            //            if (!escaped && (nextCh == chOpenAngle))                fScanner->emitError(XMLErrs::BracketInAttrValue, attrName);            //            //  If the attribute is a CDATA type we do simple replacement of            //  tabs and new lines with spaces, if the character is not escaped            //  by way of a char ref.            //            //  Otherwise, we do the standard non-CDATA normalization of            //  compressing whitespace to single spaces and getting rid of            //  leading and trailing whitespace.            //            if (type == XMLAttDef::CData)            {                if (!escaped)                {                    if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))                        nextCh = chSpace;                }            }             else            {                if (curState == InWhitespace)                {                    if (!fReaderMgr->getCurrentReader()->isWhitespace(nextCh))                    {                        if (firstNonWS)                            toFill.append(chSpace);                        curState = InContent;                        firstNonWS = true;                    }                     else                    {                        continue;                    }                }                 else if (curState == InContent)                {                    if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))                    {                        curState = InWhitespace;                        continue;                    }                    firstNonWS = true;                }            }            // Else add it to the buffer            toFill.append(nextCh);            if (secondCh)            {                toFill.append(secondCh);                secondCh=0;            }        }    }    catch(const EndOfEntityException&)    {        // Just eat it and continue.        gotLeadingSurrogate = false;        escaped = false;    }    }    return true;}bool DTDScanner::scanCharRef(XMLCh& first, XMLCh& second){    bool gotOne = false;    unsigned int value = 0;    //    //  Set the radix. Its supposed to be a lower case x if hex. But, in    //  order to recover well, we check for an upper and put out an error    //  for that.    //    unsigned int radix = 10;    if (fReaderMgr->skippedChar(chLatin_x))    {        radix = 16;    }     else if (fReaderMgr->skippedChar(chLatin_X))    {        fScanner->emitError(XMLErrs::HexRadixMustBeLowerCase);        radix = 16;    }    while (true)    {        const XMLCh nextCh = fReaderMgr->peekNextChar();        // Watch for EOF        if (!nextCh)            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);        // Break out on the terminating semicolon        if (nextCh == chSemiColon)        {            fReaderMgr->getNextChar();            break;        }        //        //  Convert this char to a binary value, or bail out if its not        //  one.        //        unsigned int nextVal;        if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9))            nextVal = (unsigned int)(nextCh - chDigit_0);        else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F))            nextVal= (unsigned int)(10 + (nextCh - chLatin_A));        else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f))            nextVal = (unsigned int)(10 + (nextCh - chLatin_a));        else

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?