dtdscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,936 行 · 第 1/5 页
CPP
1,936 行
} } } // // We saw the terminating parenthesis so lets check for any repetition // character, and create a node for that, making the head node the child // of it. // XMLCh repCh = fReaderMgr->peekNextChar(); ContentSpecNode* retNode = makeRepNode(repCh, headNode, fGrammarPoolMemoryManager); if (retNode != headNode) fReaderMgr->getNextChar(); return retNode;}//// We get here after the '<!--' part of the comment. We scan past the// terminating '-->' It will calls the appropriate handler with the comment// text, if one is provided. A comment can be in either the document or// the DTD, so the fInDocument flag is used to know which handler to send// it to.//void DTDScanner::scanComment(){ enum States { InText , OneDash , TwoDashes }; // Get a buffer for this XMLBufBid bbComment(fBufMgr); // // Get the comment text into a temp buffer. Be sure to use temp buffer // two here, since its to be used for stuff that is potentially longer // than just a name. // bool gotLeadingSurrogate = false; States curState = InText; while (true) { // Get the next character const XMLCh nextCh = fReaderMgr->getNextChar(); // Watch for an end of file if (!nextCh) { fScanner->emitError(XMLErrs::UnterminatedComment); ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); } // Check for correct surrogate pairs if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { if (gotLeadingSurrogate) fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); else gotLeadingSurrogate = true; } else { if (gotLeadingSurrogate) { if ((nextCh < 0xDC00) || (nextCh > 0xDFFF)) fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); } // Its got to at least be a valid XML character else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) { XMLCh tmpBuf[9]; XMLString::binToText ( nextCh , tmpBuf , 8 , 16 , fMemoryManager ); fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf); } gotLeadingSurrogate = false; } if (curState == InText) { // If its a dash, go to OneDash state. Otherwise take as text if (nextCh == chDash) curState = OneDash; else bbComment.append(nextCh); } else if (curState == OneDash) { // // If its another dash, then we change to the two dashes states. // Otherwise, we have to put in the deficit dash and the new // character and go back to InText. // if (nextCh == chDash) { curState = TwoDashes; } else { bbComment.append(chDash); bbComment.append(nextCh); curState = InText; } } else if (curState == TwoDashes) { // The next character must be the closing bracket if (nextCh != chCloseAngle) { fScanner->emitError(XMLErrs::IllegalSequenceInComment); fReaderMgr->skipPastChar(chCloseAngle); return; } break; } } // If there is a doc type handler, then pass on the comment stuff if (fDocTypeHandler) fDocTypeHandler->doctypeComment(bbComment.getRawBuffer());}bool DTDScanner::scanContentSpec(DTDElementDecl& toFill){ // // Check for for a couple of the predefined content type strings. If // its not one of these, its got to be a parenthesized reg ex type // expression. // if (fReaderMgr->skippedString(XMLUni::fgEmptyString)) { toFill.setModelType(DTDElementDecl::Empty); return true; } if (fReaderMgr->skippedString(XMLUni::fgAnyString)) { toFill.setModelType(DTDElementDecl::Any); return true; } // Its got to be a parenthesized regular expression if (!fReaderMgr->skippedChar(chOpenParen)) { fScanner->emitError ( XMLErrs::ExpectedContentSpecExpr , toFill.getFullName() ); return false; } // Get the current reader id, so we can test for partial markup const unsigned int curReader = fReaderMgr->getCurrentReaderNum(); // We could have a PE ref here, but don't require space checkForPERef(false, true); // // Now we look for a PCDATA string. If its PCDATA, then it must be a // MIXED model. Otherwise, it must be a regular list of children in // a regular expression perhaps. // bool status; if (fReaderMgr->skippedString(XMLUni::fgPCDATAString)) { // Set the model to mixed toFill.setModelType(DTDElementDecl::Mixed_Simple); status = scanMixed(toFill); // // If we are validating we have to check that there are no multiple // uses of any child elements. // if (fScanner->getDoValidation()) { if (((const MixedContentModel*)toFill.getContentModel())->hasDups()) fScanner->getValidator()->emitError(XMLValid::RepElemInMixed); } } else { // // We have to do a recursive scan of the content model. Create a // buffer for it to use, for efficiency. It returns the top ofthe // content spec node tree, which we set if successful. // toFill.setModelType(DTDElementDecl::Children); XMLBufBid bbTmp(fBufMgr); ContentSpecNode* resNode = scanChildren(toFill, bbTmp.getBuffer()); status = (resNode != 0); if (status) toFill.setContentSpec(resNode); } // Make sure we are on the same reader as where we started if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation()) fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE); return status;}void DTDScanner::scanDefaultDecl(DTDAttDef& toFill){ if (fReaderMgr->skippedString(XMLUni::fgRequiredString)) { toFill.setDefaultType(XMLAttDef::Required); return; } if (fReaderMgr->skippedString(XMLUni::fgImpliedString)) { toFill.setDefaultType(XMLAttDef::Implied); return; } if (fReaderMgr->skippedString(XMLUni::fgFixedString)) { // // There must be space before the fixed value. If there is not, then // emit an error but keep going. // if (!fReaderMgr->skippedSpace()) fScanner->emitError(XMLErrs::ExpectedWhitespace); else fReaderMgr->skipPastSpaces(); toFill.setDefaultType(XMLAttDef::Fixed); } else { toFill.setDefaultType(XMLAttDef::Default); } // // If we got here, its fixed or default, so we need to get a value. // If we don't, then emit an error but just set the default value to // an empty string and try to keep going. // // Check for PE ref or optional whitespace checkForPERef(false, true); XMLBufBid bbValue(fBufMgr); if (!scanAttValue(toFill.getFullName(), bbValue.getBuffer(), toFill.getType())) fScanner->emitError(XMLErrs::ExpectedDefAttrDecl); toFill.setValue(bbValue.getRawBuffer());}//// This is called after seeing '<!ELEMENT' which indicates that an element// markup is starting. This guy scans the rest of it and adds it to the// element decl pool if it has not already been declared.//void DTDScanner::scanElementDecl(){ // // Space is legal (required actually) here so check for a PE ref. If // we don't get our whitespace, then issue and error, but try to keep // going. // if (!checkForPERef(false, true)) fScanner->emitError(XMLErrs::ExpectedWhitespace); // Get a buffer for the element name and scan in the name XMLBufBid bbName(fBufMgr); if (!fReaderMgr->getName(bbName.getBuffer())) { fScanner->emitError(XMLErrs::ExpectedElementName); fReaderMgr->skipPastChar(chCloseAngle); return; } // Look this guy up in the element decl pool DTDElementDecl* decl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); // // If it does not exist, then we need to create it. If it does and // its marked as declared, then that's an error, but we still need to // scan over the content model so use the dummy declaration that the // parsing code can fill in. // if (decl) { if (decl->isDeclared()) { if (fScanner->getDoValidation()) fScanner->getValidator()->emitError(XMLValid::ElementAlreadyExists, bbName.getRawBuffer()); if (!fDumElemDecl) fDumElemDecl = new (fMemoryManager) DTDElementDecl ( bbName.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fMemoryManager ); else fDumElemDecl->setElementName(bbName.getRawBuffer(),fEmptyNamespaceId); } } else { // // Create the new empty declaration to fill in and put it into // the decl pool. // decl = new (fGrammarPoolMemoryManager) DTDElementDecl ( bbName.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fGrammarPoolMemoryManager ); fDTDGrammar->putElemDecl(decl); } // Set a flag for whether we will ignore this one const bool isIgnored = (decl == fDumElemDecl); // Mark this one if being externally declared decl->setExternalElemDeclaration(isReadingExternalEntity()); // Mark this one as being declared decl->setCreateReason(XMLElementDecl::Declared); // Another check for a PE ref, with at least required whitespace if (!checkForPERef(false, true)) fScanner->emitError(XMLErrs::ExpectedWhitespace); // And now scan the content model for this guy. if (!scanContentSpec(*decl)) { fReaderMgr->skipPastChar(chCloseAngle); return; } // Another check for a PE ref, but we don't require whitespace here checkForPERef(false, true); // And we should have the ending angle bracket if (!fReaderMgr->skippedChar(chCloseAngle)) { fScanner->emitError(XMLErrs::UnterminatedElementDecl, bbName.getRawBuffer()); fReaderMgr->skipPastChar(chCloseAngle); } // // If we have a DTD handler tell it about the new element decl. We // tell it if its one that can be ignored, cause its an override of a // previously existing decl. If it is being ignored, only call back // if advanced callbacks are enabled. // if (fDocTypeHandler) fDocTypeHandler->elementDecl(*decl, isIgnored);}//// This method will process a general or parameter entity reference. The// entity name and entity text will be stored in the entity pool. The value// of the entity will be scanned for any other parameter entity or char// references which will be expanded. So the stored value can only have// general entity references when done.//void DTDScanner::scanEntityDecl(){ // // Space is required here, but we cannot check for a PE Ref since // there could be a legal (no-ref) percent sign here. Since any // entit
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?