dtdscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,936 行 · 第 1/5 页
CPP
1,936 行
// // If we are in the internal subset and in markup, then this is // an error but we go ahead and do it anyway. // if (fInternalSubset && inMarkup) fScanner->emitError(XMLErrs::PERefInMarkupInIntSubset); if (!fReaderMgr->getName(bbName.getBuffer())) { fScanner->emitError(XMLErrs::ExpectedPEName); // Skip the semicolon if that's what we ended up on fReaderMgr->skippedChar(chSemiColon); return false; } // If no terminating semicolon, emit an error but try to keep going if (!fReaderMgr->skippedChar(chSemiColon)) fScanner->emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer()); // // Look it up in the PE decl pool and see if it exists. If not, just // emit an error and continue. // XMLEntityDecl* decl = fPEntityDeclPool->getByKey(bbName.getRawBuffer()); if (!decl) { // XML 1.0 Section 4.1 if (fScanner->getStandalone()) { // no need to check fScanner->fHasNoDTD which is for sure false // since we are in expandPERef already fScanner->emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer()); } else { if (fScanner->getDoValidation()) fScanner->getValidator()->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer()); } return false; } // // XML 1.0 Section 2.9 // If we are a standalone document, then it has to have been declared // in the internal subset. Keep going though. // if (fScanner->getDoValidation() && fScanner->getStandalone() && !decl->getDeclaredInIntSubset()) fScanner->getValidator()->emitError(XMLValid::VC_IllegalRefInStandalone, bbName.getRawBuffer()); // // Okee dokee, we found it. So create either a memory stream with // the entity value contents, or a file stream if its an external // entity. // if (decl->isExternal()) { // And now create a reader to read this entity InputSource* srcUsed; XMLReader* reader = fReaderMgr->createReader ( decl->getBaseURI() , decl->getSystemId() , decl->getPublicId() , false , inLiteral ? XMLReader::RefFrom_Literal : XMLReader::RefFrom_NonLiteral , XMLReader::Type_PE , XMLReader::Source_External , srcUsed ); // Put a janitor on the source so its cleaned up on exit Janitor<InputSource> janSrc(srcUsed); // If the creation failed then throw an exception if (!reader) ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed->getSystemId(), fMemoryManager); // Set the 'throw at end' flag, to the one we were given reader->setThrowAtEnd(throwEndOfExt); // // Push the reader. If its a recursive expansion, then emit an error // and return an failure. // if (!fReaderMgr->pushReader(reader, decl)) { fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName()); return false; } // // If the caller wants us to scan the external entity, then lets // do that now. // if (scanExternal) { XMLEntityHandler* entHandler = fScanner->getEntityHandler(); // If we have an entity handler, tell it we are starting this entity if (entHandler) entHandler->startInputSource(*srcUsed); // // Scan the external entity now. The parameter tells it that // it is not in an include section. Get the current reader // level so we can catch partial markup errors and be sure // to get back to here if we get an exception out of the // ext subset scan. // const unsigned int readerNum = fReaderMgr->getCurrentReaderNum(); try { scanExtSubsetDecl(false, false); } catch(const OutOfMemoryException&) { throw; } catch(...) { // Pop the reader back to the original level fReaderMgr->cleanStackBackTo(readerNum); // End the input source, even though its not happy if (entHandler) entHandler->endInputSource(*srcUsed); throw; } // If we have an entity handler, tell it we are ending this entity if (entHandler) entHandler->endInputSource(*srcUsed); } else { // If it starts with the XML string, then parse a text decl if (fScanner->checkXMLDecl(true)) scanTextDecl(); } } else { // Create a reader over a memory stream over the entity value XMLReader* valueReader = fReaderMgr->createIntEntReader ( decl->getName() , inLiteral ? XMLReader::RefFrom_Literal : XMLReader::RefFrom_NonLiteral , XMLReader::Type_PE , decl->getValue() , decl->getValueLen() , false ); // // Trt to push the entity reader onto the reader manager stack, // where it will become the subsequent input. If it fails, that // means the entity is recursive, so issue an error. The reader // will have just been discarded, but we just keep going. // if (!fReaderMgr->pushReader(valueReader, decl)) fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName()); } return true;}bool DTDScanner::getQuotedString(XMLBuffer& toFill){ // Reset the target buffer toFill.reset(); // Get the next char which must be a single or double quote XMLCh quoteCh; if (!fReaderMgr->skipIfQuote(quoteCh)) return false; while (true) { // Get another char const XMLCh nextCh = fReaderMgr->getNextChar(); // See if it matches the starting quote char if (nextCh == quoteCh) break; // // We should never get either an end of file null char here. If we // do, just fail. It will be handled more gracefully in the higher // level code that called us. // if (!nextCh) return false; // Else add it to the buffer toFill.append(nextCh); } return true;}XMLAttDef*DTDScanner::scanAttDef(DTDElementDecl& parentElem, XMLBuffer& bufToUse){ // Check for PE ref or optional whitespace checkForPERef(false, true); // Get the name of the attribute if (!fReaderMgr->getName(bufToUse)) { fScanner->emitError(XMLErrs::ExpectedAttrName); return 0; } // // Look up this attribute in the parent element's attribute list. If // it already exists, then use the dummy. // DTDAttDef* decl = parentElem.getAttDef(bufToUse.getRawBuffer()); if (decl) { // It already exists, so put out a warning fScanner->emitError ( XMLErrs::AttListAlreadyExists , bufToUse.getRawBuffer() , parentElem.getFullName() ); // Use the dummy decl to parse into and set its name to the name we got if (!fDumAttDef) { fDumAttDef = new (fMemoryManager) DTDAttDef(fMemoryManager); fDumAttDef->setId(fNextAttrId++); } fDumAttDef->setName(bufToUse.getRawBuffer()); decl = fDumAttDef; } else { // // It does not already exist so create a new one, give it the next // available unique id, and add it // decl = new (fGrammarPoolMemoryManager) DTDAttDef ( bufToUse.getRawBuffer() , XMLAttDef::CData , XMLAttDef::Implied , fGrammarPoolMemoryManager ); decl->setId(fNextAttrId++); decl->setExternalAttDeclaration(isReadingExternalEntity()); parentElem.addAttDef(decl); } // Set a flag to indicate whether we are doing a dummy parse const bool isIgnored = (decl == fDumAttDef); // Space is required here, so check for PE ref, and require space if (!checkForPERef(false, true)) fScanner->emitError(XMLErrs::ExpectedWhitespace); // // Next has to be one of the attribute type strings. This tells us what // is to follow. // if (fReaderMgr->skippedString(XMLUni::fgCDATAString)) { decl->setType(XMLAttDef::CData); } else if (fReaderMgr->skippedString(XMLUni::fgIDString)) { if (!fReaderMgr->skippedString(XMLUni::fgRefString)) decl->setType(XMLAttDef::ID); else if (!fReaderMgr->skippedChar(chLatin_S)) decl->setType(XMLAttDef::IDRef); else decl->setType(XMLAttDef::IDRefs); } else if (fReaderMgr->skippedString(XMLUni::fgEntitString)) { if (fReaderMgr->skippedChar(chLatin_Y)) { decl->setType(XMLAttDef::Entity); } else if (fReaderMgr->skippedString(XMLUni::fgIESString)) { decl->setType(XMLAttDef::Entities); } else { fScanner->emitError ( XMLErrs::ExpectedAttributeType , decl->getFullName() , parentElem.getFullName() ); return 0; } } else if (fReaderMgr->skippedString(XMLUni::fgNmTokenString)) { if (fReaderMgr->skippedChar(chLatin_S)) decl->setType(XMLAttDef::NmTokens); else decl->setType(XMLAttDef::NmToken); } else if (fReaderMgr->skippedString(XMLUni::fgNotationString)) { // Check for PE ref and require space if (!checkForPERef(false, true)) fScanner->emitError(XMLErrs::ExpectedWhitespace); decl->setType(XMLAttDef::Notation); if (!scanEnumeration(*decl, bufToUse, true)) return 0; // Set the value as the enumeration for this decl decl->setEnumeration(bufToUse.getRawBuffer()); } else if (fReaderMgr->skippedChar(chOpenParen)) { decl->setType(XMLAttDef::Enumeration); if (!scanEnumeration(*decl, bufToUse, false)) return 0; // Set the value as the enumeration for this decl decl->setEnumeration(bufToUse.getRawBuffer()); } else { fScanner->emitError ( XMLErrs::ExpectedAttributeType , decl->getFullName() , parentElem.getFullName() ); return 0; } // Space is required here, so check for PE ref, and require space if (!checkForPERef(false, true)) fScanner->emitError(XMLErrs::ExpectedWhitespace); // And then scan for the optional default value declaration scanDefaultDecl(*decl); // If validating, then do a couple of validation constraints if (fScanner->getDoValidation()) { if (decl->getType() == XMLAttDef::ID) { if ((decl->getDefaultType() != XMLAttDef::Implied) && (decl->getDefaultType() != XMLAttDef::Required)) { fScanner->getValidator()->emitError(XMLValid::BadIDAttrDefType, decl->getFullName()); } } // if attdef is xml:space, check correct enumeration (default|preserve) const XMLCh fgXMLSpace[] = { chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_s, chLatin_p, chLatin_a, chLatin_c, chLatin_e, chNull }; if (XMLString::equals(decl->getFullName(),fgXMLSpace)) { const XMLCh fgPreserve[] = { chLatin_p, chLatin_r, chLatin_e, chLatin_s, chLatin_e, chLatin_r, chLatin_v, chLatin_e, chNull }; const XMLCh fgDefault[] = { chLatin_d, chLatin_e, chLatin_f, chLatin_a, chLatin_u, chLatin_l, chLatin_t, chNull }; bool ok = false; if (decl->getType() == XMLAttDef::Enumeration) { BaseRefVectorOf<XMLCh>* enumVector = XMLString::tokenizeString(decl->getEnumeration(), fMemoryManager); int size = enumVector->size(); ok = (size == 1 && (XMLString::equals(enumVector->elementAt(0), fgDefault) || XMLString::equals(enumVector->elementAt(0), fgPreserve))) || (size == 2 && (XMLString::equals(enumVector->elementAt(0), fgDefault) && XMLString::equals(enumVector->elementAt(1), fgPreserve))) || (size == 2 && (XMLString::equals(enumVector->elementAt(1), fgDefault) && XMLString::equals(enumVector->elementAt(0), fgPreserve))); delete enumVector; } if (!ok) fScanner->getValidator()->emitError(XMLValid::IllegalXMLSpace); } } // If we have a doc type handler, tell it about this attdef. if (fDocTypeHandler)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?