dtdscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,936 行 · 第 1/5 页
CPP
1,936 行
fDocTypeHandler->attDef(parentElem, *decl, isIgnored); return decl;}void DTDScanner::scanAttListDecl(){ // Space is required here, so check for a PE ref if (!checkForPERef(false, true)) { fScanner->emitError(XMLErrs::ExpectedWhitespace); fReaderMgr->skipPastChar(chCloseAngle); return; } // // Next should be the name of the element it belongs to, so get a buffer // and get the name into it. // XMLBufBid bbName(fBufMgr); if (!fReaderMgr->getName(bbName.getBuffer())) { fScanner->emitError(XMLErrs::ExpectedElementName); fReaderMgr->skipPastChar(chCloseAngle); return; } // // Find this element's declaration. If it has not been declared yet, // we will force one into the list, but not mark it as declared. // DTDElementDecl* elemDecl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE); if (!elemDecl) { // // Lets fault in a declaration and add it to the pool. We mark // it having been created because of an attlist. Later, if its // declared, this will be updated. // elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl ( bbName.getRawBuffer() , fEmptyNamespaceId , DTDElementDecl::Any , fGrammarPoolMemoryManager ); elemDecl->setCreateReason(XMLElementDecl::AttList); elemDecl->setExternalElemDeclaration(isReadingExternalEntity()); fDTDGrammar->putElemDecl((XMLElementDecl*) elemDecl); } // If we have a doc type handler, tell it the att list is starting if (fDocTypeHandler) fDocTypeHandler->startAttList(*elemDecl); // // Now we loop until we are done with all of the attributes in this // list. We need a buffer to use for local processing. // XMLBufBid bbTmp(fBufMgr); XMLBuffer& tmpBuf = bbTmp.getBuffer(); bool seenAnId = false; while (true) { // Get the next char out and see what it tells us to do const XMLCh nextCh = fReaderMgr->peekNextChar(); // Watch for EOF if (!nextCh) ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); if (nextCh == chCloseAngle) { // We are done with this attribute list fReaderMgr->getNextChar(); break; } else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh)) { // // If advanced callbacks are enabled and we have a doc // type handler, then gather up the white space and call // back on the doctype handler. Otherwise, just skip // whitespace. // if (fDocTypeHandler) { fReaderMgr->getSpaces(tmpBuf); fDocTypeHandler->doctypeWhitespace ( tmpBuf.getRawBuffer() , tmpBuf.getLen() ); } else { fReaderMgr->skipPastSpaces(); } } else if (nextCh == chPercent) { // Eat the percent and expand the ref fReaderMgr->getNextChar(); expandPERef(false, false, true); } else { // // It must be an attribute name, so scan it. We let // it use our local buffer for its name scanning. // XMLAttDef* attDef = scanAttDef(*elemDecl, tmpBuf); if (!attDef) { fReaderMgr->skipPastChar(chCloseAngle); break; } // // If we are validating and its an ID type, then we have to // make sure that we have not seen an id attribute yet. Set // the flag to say that we've seen one now also. // if (fScanner->getDoValidation()) { if (attDef->getType() == XMLAttDef::ID) { if (seenAnId) fScanner->getValidator()->emitError(XMLValid::MultipleIdAttrs, elemDecl->getFullName()); seenAnId = true; } } } } // If we have a doc type handler, tell it the att list is ending if (fDocTypeHandler) fDocTypeHandler->endAttList(*elemDecl);}//// This method is called to scan the value of an attribute in content. This// involves some normalization and replacement of general entity and// character references.//// End of entity's must be dealt with here. During DTD scan, they can come// from external entities. During content, they can come from any entity.// We just eat the end of entity and continue with our scan until we come// to the closing quote. If an unterminated value causes us to go through// subsequent entities, that will cause errors back in the calling code,// but there's little we can do about it here.//bool DTDScanner::scanAttValue(const XMLCh* const attrName , XMLBuffer& toFill , const XMLAttDef::AttTypes type){ enum States { InWhitespace , InContent }; // Reset the target buffer toFill.reset(); // Get the next char which must be a single or double quote XMLCh quoteCh; if (!fReaderMgr->skipIfQuote(quoteCh)) return false; // // We have to get the current reader because we have to ignore closing // quotes until we hit the same reader again. // const unsigned int curReader = fReaderMgr->getCurrentReaderNum(); // // Loop until we get the attribute value. Note that we use a double // loop here to avoid the setup/teardown overhead of the exception // handler on every round. // XMLCh nextCh; XMLCh secondCh = 0; States curState = InContent; bool firstNonWS = false; bool gotLeadingSurrogate = false; bool escaped; while (true) { try { while(true) { nextCh = fReaderMgr->getNextChar(); if (!nextCh) ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); // Check for our ending quote in the same entity if (nextCh == quoteCh) { if (curReader == fReaderMgr->getCurrentReaderNum()) return true; // Watch for spillover into a previous entity if (curReader > fReaderMgr->getCurrentReaderNum()) { fScanner->emitError(XMLErrs::PartialMarkupInEntity); return false; } } // // Check for an entity ref now, before we let it affect our // whitespace normalization logic below. We ignore the empty flag // in this one. // escaped = false; if (nextCh == chAmpersand) { if (scanEntityRef(nextCh, secondCh, escaped) != EntityExp_Returned) { gotLeadingSurrogate = false; continue; } } else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { // Check for correct surrogate pairs if (gotLeadingSurrogate) fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); else gotLeadingSurrogate = true; } else { if (gotLeadingSurrogate) { if ((nextCh < 0xDC00) || (nextCh > 0xDFFF)) fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); } // Its got to at least be a valid XML character else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) { XMLCh tmpBuf[9]; XMLString::binToText ( nextCh , tmpBuf , 8 , 16 , fMemoryManager ); fScanner->emitError ( XMLErrs::InvalidCharacterInAttrValue , attrName , tmpBuf ); } gotLeadingSurrogate = false; } // // If its not escaped, then make sure its not a < character, which // is not allowed in attribute values. // if (!escaped && (nextCh == chOpenAngle)) fScanner->emitError(XMLErrs::BracketInAttrValue, attrName); // // If the attribute is a CDATA type we do simple replacement of // tabs and new lines with spaces, if the character is not escaped // by way of a char ref. // // Otherwise, we do the standard non-CDATA normalization of // compressing whitespace to single spaces and getting rid of // leading and trailing whitespace. // if (type == XMLAttDef::CData) { if (!escaped) { if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D)) nextCh = chSpace; } } else { if (curState == InWhitespace) { if (!fReaderMgr->getCurrentReader()->isWhitespace(nextCh)) { if (firstNonWS) toFill.append(chSpace); curState = InContent; firstNonWS = true; } else { continue; } } else if (curState == InContent) { if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh)) { curState = InWhitespace; continue; } firstNonWS = true; } } // Else add it to the buffer toFill.append(nextCh); if (secondCh) { toFill.append(secondCh); secondCh=0; } } } catch(const EndOfEntityException&) { // Just eat it and continue. gotLeadingSurrogate = false; escaped = false; } } return true;}bool DTDScanner::scanCharRef(XMLCh& first, XMLCh& second){ bool gotOne = false; unsigned int value = 0; // // Set the radix. Its supposed to be a lower case x if hex. But, in // order to recover well, we check for an upper and put out an error // for that. // unsigned int radix = 10; if (fReaderMgr->skippedChar(chLatin_x)) { radix = 16; } else if (fReaderMgr->skippedChar(chLatin_X)) { fScanner->emitError(XMLErrs::HexRadixMustBeLowerCase); radix = 16; } while (true) { const XMLCh nextCh = fReaderMgr->peekNextChar(); // Watch for EOF if (!nextCh) ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); // Break out on the terminating semicolon if (nextCh == chSemiColon) { fReaderMgr->getNextChar(); break; } // // Convert this char to a binary value, or bail out if its not // one. // unsigned int nextVal; if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9)) nextVal = (unsigned int)(nextCh - chDigit_0); else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F)) nextVal= (unsigned int)(10 + (nextCh - chLatin_A)); else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f)) nextVal = (unsigned int)(10 + (nextCh - chLatin_a)); else
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?