dtdscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,936 行 · 第 1/5 页
CPP
1,936 行
{ // // If we got at least a sigit, then do an unterminated ref // error. Else, do an expected a numerical ref thing. // if (gotOne) fScanner->emitError(XMLErrs::UnterminatedCharRef); else fScanner->emitError(XMLErrs::ExpectedNumericalCharRef); return false; } // // Make sure its valid for the radix. If not, then just eat the // digit and go on after issueing an error. Else, update the // running value with this new digit. // if (nextVal >= radix) { XMLCh tmpStr[2]; tmpStr[0] = nextCh; tmpStr[1] = chNull; fScanner->emitError(XMLErrs::BadDigitForRadix, tmpStr); } else { value = (value * radix) + nextVal; } // Indicate that we got at least one good digit gotOne = true; // Eat the char we just processed fReaderMgr->getNextChar(); } // Return the char (or chars) // And check if the character expanded is valid or not if (value >= 0x10000 && value <= 0x10FFFF) { value -= 0x10000; first = XMLCh((value >> 10) + 0xD800); second = XMLCh((value & 0x3FF) + 0xDC00); } else if (value <= 0xFFFD) { first = XMLCh(value); second = 0; if (!fReaderMgr->getCurrentReader()->isXMLChar(first) && !fReaderMgr->getCurrentReader()->isControlChar(first)) { // Character reference was not in the valid range fScanner->emitError(XMLErrs::InvalidCharacterRef); return false; } } else { // Character reference was not in the valid range fScanner->emitError(XMLErrs::InvalidCharacterRef); return false; } return true;}ContentSpecNode*DTDScanner::scanChildren(const DTDElementDecl& elemDecl, XMLBuffer& bufToUse){ // Check for a PE ref here, but don't require spaces checkForPERef(false, true); // We have to check entity nesting here unsigned int curReader; // // We know that the caller just saw an opening parenthesis, so we need // to parse until we hit the end of it, recursing for other nested // parentheses we see. // // We have to check for one up front, since it could be something like // (((a)*)) etc... // ContentSpecNode* curNode = 0; if (fReaderMgr->skippedChar(chOpenParen)) { curReader = fReaderMgr->getCurrentReaderNum(); // Lets call ourself and get back the resulting node curNode = scanChildren(elemDecl, bufToUse); // If that failed, no need to go further, return failure if (!curNode) return 0; if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation()) fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE); } else { // Not a nested paren, so it must be a leaf node if (!fReaderMgr->getName(bufToUse)) { fScanner->emitError(XMLErrs::ExpectedElementName); return 0; } // // Create a leaf node for it. If we can find the element id for // this element, then use it. Else, we have to fault in an element // decl, marked as created because of being in a content model. // XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); if (!decl) { decl = new (fGrammarPoolMemoryManager) DTDElementDecl ( bufToUse.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fGrammarPoolMemoryManager ); decl->setCreateReason(XMLElementDecl::InContentModel); decl->setExternalElemDeclaration(isReadingExternalEntity()); fDTDGrammar->putElemDecl(decl); } curNode = new (fGrammarPoolMemoryManager) ContentSpecNode ( decl->getElementName() , fGrammarPoolMemoryManager ); // Check for a PE ref here, but don't require spaces const bool gotSpaces = checkForPERef(false, true); // Check for a repetition character after the leaf const XMLCh repCh = fReaderMgr->peekNextChar(); ContentSpecNode* tmpNode = makeRepNode(repCh, curNode, fGrammarPoolMemoryManager); if (tmpNode != curNode) { if (gotSpaces) { if (fScanner->emitErrorWillThrowException(XMLErrs::UnexpectedWhitespace)) { delete tmpNode; } fScanner->emitError(XMLErrs::UnexpectedWhitespace); } fReaderMgr->getNextChar(); curNode = tmpNode; } } // Check for a PE ref here, but don't require spaces checkForPERef(false, true); // // Ok, the next character tells us what kind of content this particular // model this particular parentesized section is. Its either a choice if // we see ',', a sequence if we see '|', or a single leaf node if we see // a closing paren. // const XMLCh opCh = fReaderMgr->peekNextChar(); if ((opCh != chComma) && (opCh != chPipe) && (opCh != chCloseParen)) { // Not a legal char, so delete our node and return failure delete curNode; fScanner->emitError(XMLErrs::ExpectedSeqChoiceLeaf); return 0; } // // Create the head node of the correct type. We need this to remember // the top of the local tree. If it was a single subexpr, then just // set the head node to the current node. For the others, we'll build // the tree off the second child as we move across. // ContentSpecNode* headNode = 0; ContentSpecNode::NodeTypes curType = ContentSpecNode::UnknownType; if (opCh == chComma) { curType = ContentSpecNode::Sequence; headNode = new (fGrammarPoolMemoryManager) ContentSpecNode ( curType , curNode , 0 , true , true , fGrammarPoolMemoryManager ); curNode = headNode; } else if (opCh == chPipe) { curType = ContentSpecNode::Choice; headNode = new (fGrammarPoolMemoryManager) ContentSpecNode ( curType , curNode , 0 , true , true , fGrammarPoolMemoryManager ); curNode = headNode; } else { headNode = curNode; fReaderMgr->getNextChar(); } // // If it was a sequence or choice, we just loop until we get to the // end of our section, adding each new leaf or sub expression to the // right child of the current node, and making that new node the current // node. // if ((opCh == chComma) || (opCh == chPipe)) { ContentSpecNode* lastNode = 0; while (true) { // // The next thing must either be another | or , character followed // by another leaf or subexpression, or a closing parenthesis, or a // PE ref. // if (fReaderMgr->lookingAtChar(chPercent)) { checkForPERef(false, true); } else if (fReaderMgr->skippedSpace()) { // Just skip whitespace fReaderMgr->skipPastSpaces(); } else if (fReaderMgr->skippedChar(chCloseParen)) { // // We've hit the end of this section, so break out. But, we // need to see if we left a partial sequence of choice node // without a second node. If so, we have to undo that and // put its left child into the right node of the previous // node. // if ((curNode->getType() == ContentSpecNode::Choice) || (curNode->getType() == ContentSpecNode::Sequence)) { if (!curNode->getSecond()) { ContentSpecNode* saveFirst = curNode->orphanFirst(); lastNode->setSecond(saveFirst); curNode = lastNode; } } break; } else if (fReaderMgr->skippedChar(opCh)) { // Check for a PE ref here, but don't require spaces checkForPERef(false, true); if (fReaderMgr->skippedChar(chOpenParen)) { curReader = fReaderMgr->getCurrentReaderNum(); // Recurse to handle this new guy ContentSpecNode* subNode; try { subNode = scanChildren(elemDecl, bufToUse); } catch (const XMLErrs::Codes) { delete headNode; throw; } // If it failed, we are done, clean up here and return failure if (!subNode) { delete headNode; return 0; } if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation()) fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE); // Else patch it in and make it the new current ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode ( curType , subNode , 0 , true , true , fGrammarPoolMemoryManager ); curNode->setSecond(newCur); lastNode = curNode; curNode = newCur; } else { // // Got to be a leaf node, so get a name. If we cannot get // one, then clean up and get outa here. // if (!fReaderMgr->getName(bufToUse)) { delete headNode; fScanner->emitError(XMLErrs::ExpectedElementName); return 0; } // // Create a leaf node for it. If we can find the element // id for this element, then use it. Else, we have to // fault in an element decl, marked as created because // of being in a content model. // XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); if (!decl) { decl = new (fGrammarPoolMemoryManager) DTDElementDecl ( bufToUse.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fGrammarPoolMemoryManager ); decl->setCreateReason(XMLElementDecl::InContentModel); decl->setExternalElemDeclaration(isReadingExternalEntity()); fDTDGrammar->putElemDecl(decl); } ContentSpecNode* tmpLeaf = new (fGrammarPoolMemoryManager) ContentSpecNode ( decl->getElementName() , fGrammarPoolMemoryManager ); // Check for a repetition character after the leaf const XMLCh repCh = fReaderMgr->peekNextChar(); ContentSpecNode* tmpLeaf2 = makeRepNode(repCh, tmpLeaf, fGrammarPoolMemoryManager); if (tmpLeaf != tmpLeaf2) fReaderMgr->getNextChar(); // // Create a new sequence or choice node, with the leaf // (or rep surrounding it) we just got as its first node. // Make the new node the second node of the current node, // and then make it the current node. // ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode ( curType , tmpLeaf2 , 0 , true , true , fGrammarPoolMemoryManager ); curNode->setSecond(newCur); lastNode = curNode; curNode = newCur; } } else { // Cannot be valid delete headNode; // emitError may do a throw so need to clean-up first if (opCh == chComma) { fScanner->emitError(XMLErrs::ExpectedChoiceOrCloseParen); } else { fScanner->emitError ( XMLErrs::ExpectedSeqOrCloseParen , elemDecl.getFullName() ); } return 0;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?