dtdscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,936 行 · 第 1/5 页

CPP
1,936
字号
        {            //            //  If we got at least a sigit, then do an unterminated ref            //  error. Else, do an expected a numerical ref thing.            //            if (gotOne)                fScanner->emitError(XMLErrs::UnterminatedCharRef);            else                fScanner->emitError(XMLErrs::ExpectedNumericalCharRef);            return false;        }        //        //  Make sure its valid for the radix. If not, then just eat the        //  digit and go on after issueing an error. Else, update the        //  running value with this new digit.        //        if (nextVal >= radix)        {            XMLCh tmpStr[2];            tmpStr[0] = nextCh;            tmpStr[1] = chNull;            fScanner->emitError(XMLErrs::BadDigitForRadix, tmpStr);        }         else        {            value = (value * radix) + nextVal;        }        // Indicate that we got at least one good digit        gotOne = true;        // Eat the char we just processed        fReaderMgr->getNextChar();    }    // Return the char (or chars)    // And check if the character expanded is valid or not    if (value >= 0x10000 && value <= 0x10FFFF)    {        value -= 0x10000;        first  = XMLCh((value >> 10) + 0xD800);        second = XMLCh((value & 0x3FF) + 0xDC00);    }    else if (value <= 0xFFFD)    {        first  = XMLCh(value);        second = 0;        if (!fReaderMgr->getCurrentReader()->isXMLChar(first) && !fReaderMgr->getCurrentReader()->isControlChar(first)) {            // Character reference was not in the valid range            fScanner->emitError(XMLErrs::InvalidCharacterRef);            return false;        }    }    else {        // Character reference was not in the valid range        fScanner->emitError(XMLErrs::InvalidCharacterRef);        return false;    }    return true;}ContentSpecNode*DTDScanner::scanChildren(const DTDElementDecl& elemDecl, XMLBuffer& bufToUse){    // Check for a PE ref here, but don't require spaces    checkForPERef(false, true);    // We have to check entity nesting here    unsigned int curReader;    //    //  We know that the caller just saw an opening parenthesis, so we need    //  to parse until we hit the end of it, recursing for other nested    //  parentheses we see.    //    //  We have to check for one up front, since it could be something like    //  (((a)*)) etc...    //    ContentSpecNode* curNode = 0;    if (fReaderMgr->skippedChar(chOpenParen))    {        curReader = fReaderMgr->getCurrentReaderNum();        // Lets call ourself and get back the resulting node        curNode = scanChildren(elemDecl, bufToUse);        // If that failed, no need to go further, return failure        if (!curNode)            return 0;        if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())            fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);    }     else    {        // Not a nested paren, so it must be a leaf node        if (!fReaderMgr->getName(bufToUse))        {            fScanner->emitError(XMLErrs::ExpectedElementName);            return 0;        }        //        //  Create a leaf node for it. If we can find the element id for        //  this element, then use it. Else, we have to fault in an element        //  decl, marked as created because of being in a content model.        //        XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);        if (!decl)        {            decl = new (fGrammarPoolMemoryManager) DTDElementDecl            (                bufToUse.getRawBuffer()                , fEmptyNamespaceId                , DTDElementDecl::Any                , fGrammarPoolMemoryManager            );            decl->setCreateReason(XMLElementDecl::InContentModel);            decl->setExternalElemDeclaration(isReadingExternalEntity());            fDTDGrammar->putElemDecl(decl);        }        curNode = new (fGrammarPoolMemoryManager) ContentSpecNode        (            decl->getElementName()            , fGrammarPoolMemoryManager        );        // Check for a PE ref here, but don't require spaces        const bool gotSpaces = checkForPERef(false, true);        // Check for a repetition character after the leaf        const XMLCh repCh = fReaderMgr->peekNextChar();        ContentSpecNode* tmpNode = makeRepNode(repCh, curNode, fGrammarPoolMemoryManager);        if (tmpNode != curNode)        {            if (gotSpaces)            {                if (fScanner->emitErrorWillThrowException(XMLErrs::UnexpectedWhitespace))                {                    delete tmpNode;                }                fScanner->emitError(XMLErrs::UnexpectedWhitespace);            }            fReaderMgr->getNextChar();            curNode = tmpNode;        }    }    // Check for a PE ref here, but don't require spaces    checkForPERef(false, true);    //    //  Ok, the next character tells us what kind of content this particular    //  model this particular parentesized section is. Its either a choice if    //  we see ',', a sequence if we see '|', or a single leaf node if we see    //  a closing paren.    //    const XMLCh opCh = fReaderMgr->peekNextChar();    if ((opCh != chComma)    &&  (opCh != chPipe)    &&  (opCh != chCloseParen))    {        // Not a legal char, so delete our node and return failure        delete curNode;        fScanner->emitError(XMLErrs::ExpectedSeqChoiceLeaf);        return 0;    }    //    //  Create the head node of the correct type. We need this to remember    //  the top of the local tree. If it was a single subexpr, then just    //  set the head node to the current node. For the others, we'll build    //  the tree off the second child as we move across.    //    ContentSpecNode* headNode = 0;    ContentSpecNode::NodeTypes curType = ContentSpecNode::UnknownType;    if (opCh == chComma)    {        curType = ContentSpecNode::Sequence;        headNode = new (fGrammarPoolMemoryManager) ContentSpecNode        (            curType            , curNode            , 0            , true            , true            , fGrammarPoolMemoryManager        );        curNode = headNode;    }     else if (opCh == chPipe)    {        curType = ContentSpecNode::Choice;        headNode = new (fGrammarPoolMemoryManager) ContentSpecNode        (            curType            , curNode            , 0            , true            , true            , fGrammarPoolMemoryManager        );        curNode = headNode;    }     else    {        headNode = curNode;        fReaderMgr->getNextChar();    }    //    //  If it was a sequence or choice, we just loop until we get to the    //  end of our section, adding each new leaf or sub expression to the    //  right child of the current node, and making that new node the current    //  node.    //    if ((opCh == chComma) || (opCh == chPipe))    {        ContentSpecNode* lastNode = 0;        while (true)        {            //            //  The next thing must either be another | or , character followed            //  by another leaf or subexpression, or a closing parenthesis, or a            //  PE ref.            //            if (fReaderMgr->lookingAtChar(chPercent))            {                checkForPERef(false, true);            }             else if (fReaderMgr->skippedSpace())            {                // Just skip whitespace                fReaderMgr->skipPastSpaces();            }             else if (fReaderMgr->skippedChar(chCloseParen))            {                //                //  We've hit the end of this section, so break out. But, we                //  need to see if we left a partial sequence of choice node                //  without a second node. If so, we have to undo that and                //  put its left child into the right node of the previous                //  node.                //                if ((curNode->getType() == ContentSpecNode::Choice)                ||  (curNode->getType() == ContentSpecNode::Sequence))                {                    if (!curNode->getSecond())                    {                        ContentSpecNode* saveFirst = curNode->orphanFirst();                        lastNode->setSecond(saveFirst);                        curNode = lastNode;                    }                }                break;            }             else if (fReaderMgr->skippedChar(opCh))            {                // Check for a PE ref here, but don't require spaces                checkForPERef(false, true);                if (fReaderMgr->skippedChar(chOpenParen))                {                    curReader = fReaderMgr->getCurrentReaderNum();                    // Recurse to handle this new guy                    ContentSpecNode* subNode;                    try {                        subNode = scanChildren(elemDecl, bufToUse);                    }                    catch (const XMLErrs::Codes)                    {                        delete headNode;                        throw;                    }                    // If it failed, we are done, clean up here and return failure                    if (!subNode)                    {                        delete headNode;                        return 0;                    }                    if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())                        fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);                    // Else patch it in and make it the new current                    ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode                    (                        curType                        , subNode                        , 0                        , true                        , true                        , fGrammarPoolMemoryManager                    );                    curNode->setSecond(newCur);                    lastNode = curNode;                    curNode = newCur;                }                 else                {                    //                    //  Got to be a leaf node, so get a name. If we cannot get                    //  one, then clean up and get outa here.                    //                    if (!fReaderMgr->getName(bufToUse))                    {                        delete headNode;                        fScanner->emitError(XMLErrs::ExpectedElementName);                        return 0;                    }                    //                    //  Create a leaf node for it. If we can find the element                    //  id for this element, then use it. Else, we have to                    //  fault in an element decl, marked as created because                    //  of being in a content model.                    //                    XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);                    if (!decl)                    {                        decl = new (fGrammarPoolMemoryManager) DTDElementDecl                        (                            bufToUse.getRawBuffer()                            , fEmptyNamespaceId                            , DTDElementDecl::Any                            , fGrammarPoolMemoryManager                        );                        decl->setCreateReason(XMLElementDecl::InContentModel);                        decl->setExternalElemDeclaration(isReadingExternalEntity());                        fDTDGrammar->putElemDecl(decl);                    }                    ContentSpecNode* tmpLeaf = new (fGrammarPoolMemoryManager) ContentSpecNode                    (                        decl->getElementName()                        , fGrammarPoolMemoryManager                    );                    // Check for a repetition character after the leaf                    const XMLCh repCh = fReaderMgr->peekNextChar();                    ContentSpecNode* tmpLeaf2 = makeRepNode(repCh, tmpLeaf, fGrammarPoolMemoryManager);                    if (tmpLeaf != tmpLeaf2)                        fReaderMgr->getNextChar();                    //                    //  Create a new sequence or choice node, with the leaf                    //  (or rep surrounding it) we just got as its first node.                    //  Make the new node the second node of the current node,                    //  and then make it the current node.                    //                    ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode                    (                        curType                        , tmpLeaf2                        , 0                        , true                        , true                        , fGrammarPoolMemoryManager                    );                    curNode->setSecond(newCur);                    lastNode = curNode;                    curNode = newCur;                }            }             else            {                // Cannot be valid                delete headNode;  // emitError may do a throw so need to clean-up first                if (opCh == chComma)                {                    fScanner->emitError(XMLErrs::ExpectedChoiceOrCloseParen);                }                 else                {                    fScanner->emitError                    (                        XMLErrs::ExpectedSeqOrCloseParen                        , elemDecl.getFullName()                    );                }                                return 0;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?