dtdscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,936 行 · 第 1/5 页

CPP
1,936
字号
    //    //  If we are in the internal subset and in markup, then this is    //  an error but we go ahead and do it anyway.    //    if (fInternalSubset && inMarkup)        fScanner->emitError(XMLErrs::PERefInMarkupInIntSubset);    if (!fReaderMgr->getName(bbName.getBuffer()))    {        fScanner->emitError(XMLErrs::ExpectedPEName);        // Skip the semicolon if that's what we ended up on        fReaderMgr->skippedChar(chSemiColon);        return false;    }    // If no terminating semicolon, emit an error but try to keep going    if (!fReaderMgr->skippedChar(chSemiColon))        fScanner->emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());    //    //  Look it up in the PE decl pool and see if it exists. If not, just    //  emit an error and continue.    //    XMLEntityDecl* decl = fPEntityDeclPool->getByKey(bbName.getRawBuffer());    if (!decl)    {        // XML 1.0 Section 4.1        if (fScanner->getStandalone()) {            // no need to check fScanner->fHasNoDTD which is for sure false            // since we are in expandPERef already            fScanner->emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());        }        else {            if (fScanner->getDoValidation())                fScanner->getValidator()->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());        }        return false;    }    //    // XML 1.0 Section 2.9    //  If we are a standalone document, then it has to have been declared    //  in the internal subset. Keep going though.    //    if (fScanner->getDoValidation() && fScanner->getStandalone() && !decl->getDeclaredInIntSubset())        fScanner->getValidator()->emitError(XMLValid::VC_IllegalRefInStandalone, bbName.getRawBuffer());    //    //  Okee dokee, we found it. So create either a memory stream with    //  the entity value contents, or a file stream if its an external    //  entity.    //    if (decl->isExternal())    {        // And now create a reader to read this entity        InputSource* srcUsed;        XMLReader* reader = fReaderMgr->createReader        (            decl->getBaseURI()            , decl->getSystemId()            , decl->getPublicId()            , false            , inLiteral ? XMLReader::RefFrom_Literal : XMLReader::RefFrom_NonLiteral            , XMLReader::Type_PE            , XMLReader::Source_External            , srcUsed        );        // Put a janitor on the source so its cleaned up on exit        Janitor<InputSource> janSrc(srcUsed);        // If the creation failed then throw an exception        if (!reader)            ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed->getSystemId(), fMemoryManager);        // Set the 'throw at end' flag, to the one we were given        reader->setThrowAtEnd(throwEndOfExt);        //        //  Push the reader. If its a recursive expansion, then emit an error        //  and return an failure.        //        if (!fReaderMgr->pushReader(reader, decl))        {            fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());            return false;        }        //        //  If the caller wants us to scan the external entity, then lets        //  do that now.        //        if (scanExternal)        {            XMLEntityHandler* entHandler = fScanner->getEntityHandler();            // If we have an entity handler, tell it we are starting this entity            if (entHandler)                entHandler->startInputSource(*srcUsed);            //            //  Scan the external entity now. The parameter tells it that            //  it is not in an include section. Get the current reader            //  level so we can catch partial markup errors and be sure            //  to get back to here if we get an exception out of the            //  ext subset scan.            //            const unsigned int readerNum = fReaderMgr->getCurrentReaderNum();            try            {                scanExtSubsetDecl(false, false);            }            catch(const OutOfMemoryException&)            {                throw;            }            catch(...)            {                // Pop the reader back to the original level                fReaderMgr->cleanStackBackTo(readerNum);                // End the input source, even though its not happy                if (entHandler)                    entHandler->endInputSource(*srcUsed);                throw;            }            // If we have an entity handler, tell it we are ending this entity            if (entHandler)                entHandler->endInputSource(*srcUsed);        }        else {            // If it starts with the XML string, then parse a text decl            if (fScanner->checkXMLDecl(true))                scanTextDecl();        }    }     else    {        // Create a reader over a memory stream over the entity value        XMLReader* valueReader = fReaderMgr->createIntEntReader        (            decl->getName()            , inLiteral ? XMLReader::RefFrom_Literal : XMLReader::RefFrom_NonLiteral            , XMLReader::Type_PE            , decl->getValue()            , decl->getValueLen()            , false        );        //        //  Trt to push the entity reader onto the reader manager stack,        //  where it will become the subsequent input. If it fails, that        //  means the entity is recursive, so issue an error. The reader        //  will have just been discarded, but we just keep going.        //        if (!fReaderMgr->pushReader(valueReader, decl))            fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());    }    return true;}bool DTDScanner::getQuotedString(XMLBuffer& toFill){    // Reset the target buffer    toFill.reset();    // Get the next char which must be a single or double quote    XMLCh quoteCh;    if (!fReaderMgr->skipIfQuote(quoteCh))        return false;    while (true)    {        // Get another char        const XMLCh nextCh = fReaderMgr->getNextChar();        // See if it matches the starting quote char        if (nextCh == quoteCh)            break;        //        //  We should never get either an end of file null char here. If we        //  do, just fail. It will be handled more gracefully in the higher        //  level code that called us.        //        if (!nextCh)            return false;        // Else add it to the buffer        toFill.append(nextCh);    }    return true;}XMLAttDef*DTDScanner::scanAttDef(DTDElementDecl& parentElem, XMLBuffer& bufToUse){    // Check for PE ref or optional whitespace    checkForPERef(false, true);    // Get the name of the attribute    if (!fReaderMgr->getName(bufToUse))    {        fScanner->emitError(XMLErrs::ExpectedAttrName);        return 0;    }    //    //  Look up this attribute in the parent element's attribute list. If    //  it already exists, then use the dummy.    //    DTDAttDef* decl = parentElem.getAttDef(bufToUse.getRawBuffer());    if (decl)    {        // It already exists, so put out a warning        fScanner->emitError        (            XMLErrs::AttListAlreadyExists            , bufToUse.getRawBuffer()            , parentElem.getFullName()        );        // Use the dummy decl to parse into and set its name to the name we got        if (!fDumAttDef)        {            fDumAttDef = new (fMemoryManager) DTDAttDef(fMemoryManager);            fDumAttDef->setId(fNextAttrId++);        }        fDumAttDef->setName(bufToUse.getRawBuffer());        decl = fDumAttDef;    }     else    {        //        //  It does not already exist so create a new one, give it the next        //  available unique id, and add it        //        decl = new (fGrammarPoolMemoryManager) DTDAttDef        (            bufToUse.getRawBuffer()            , XMLAttDef::CData            , XMLAttDef::Implied            , fGrammarPoolMemoryManager        );        decl->setId(fNextAttrId++);        decl->setExternalAttDeclaration(isReadingExternalEntity());        parentElem.addAttDef(decl);    }    // Set a flag to indicate whether we are doing a dummy parse    const bool isIgnored = (decl == fDumAttDef);    // Space is required here, so check for PE ref, and require space    if (!checkForPERef(false, true))        fScanner->emitError(XMLErrs::ExpectedWhitespace);    //    //  Next has to be one of the attribute type strings. This tells us what    //  is to follow.    //    if (fReaderMgr->skippedString(XMLUni::fgCDATAString))    {        decl->setType(XMLAttDef::CData);    }     else if (fReaderMgr->skippedString(XMLUni::fgIDString))    {        if (!fReaderMgr->skippedString(XMLUni::fgRefString))            decl->setType(XMLAttDef::ID);        else if (!fReaderMgr->skippedChar(chLatin_S))            decl->setType(XMLAttDef::IDRef);        else            decl->setType(XMLAttDef::IDRefs);    }     else if (fReaderMgr->skippedString(XMLUni::fgEntitString))    {        if (fReaderMgr->skippedChar(chLatin_Y))        {            decl->setType(XMLAttDef::Entity);        }         else if (fReaderMgr->skippedString(XMLUni::fgIESString))        {            decl->setType(XMLAttDef::Entities);        }         else        {            fScanner->emitError            (                XMLErrs::ExpectedAttributeType                , decl->getFullName()                , parentElem.getFullName()            );            return 0;        }    }     else if (fReaderMgr->skippedString(XMLUni::fgNmTokenString))    {        if (fReaderMgr->skippedChar(chLatin_S))            decl->setType(XMLAttDef::NmTokens);        else            decl->setType(XMLAttDef::NmToken);    }     else if (fReaderMgr->skippedString(XMLUni::fgNotationString))    {        // Check for PE ref and require space        if (!checkForPERef(false, true))            fScanner->emitError(XMLErrs::ExpectedWhitespace);        decl->setType(XMLAttDef::Notation);        if (!scanEnumeration(*decl, bufToUse, true))            return 0;        // Set the value as the enumeration for this decl        decl->setEnumeration(bufToUse.getRawBuffer());    }     else if (fReaderMgr->skippedChar(chOpenParen))    {        decl->setType(XMLAttDef::Enumeration);        if (!scanEnumeration(*decl, bufToUse, false))            return 0;        // Set the value as the enumeration for this decl        decl->setEnumeration(bufToUse.getRawBuffer());    }     else    {        fScanner->emitError        (            XMLErrs::ExpectedAttributeType            , decl->getFullName()            , parentElem.getFullName()        );        return 0;    }    // Space is required here, so check for PE ref, and require space    if (!checkForPERef(false, true))        fScanner->emitError(XMLErrs::ExpectedWhitespace);    // And then scan for the optional default value declaration    scanDefaultDecl(*decl);    // If validating, then do a couple of validation constraints    if (fScanner->getDoValidation())    {        if (decl->getType() == XMLAttDef::ID)        {            if ((decl->getDefaultType() != XMLAttDef::Implied)            &&  (decl->getDefaultType() != XMLAttDef::Required))            {                fScanner->getValidator()->emitError(XMLValid::BadIDAttrDefType, decl->getFullName());            }        }        // if attdef is xml:space, check correct enumeration (default|preserve)        const XMLCh fgXMLSpace[] = { chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_s, chLatin_p, chLatin_a, chLatin_c, chLatin_e, chNull };        if (XMLString::equals(decl->getFullName(),fgXMLSpace)) {            const XMLCh fgPreserve[] = { chLatin_p, chLatin_r, chLatin_e, chLatin_s, chLatin_e, chLatin_r, chLatin_v, chLatin_e, chNull };            const XMLCh fgDefault[] = { chLatin_d, chLatin_e, chLatin_f, chLatin_a, chLatin_u, chLatin_l, chLatin_t, chNull };            bool ok = false;            if (decl->getType() == XMLAttDef::Enumeration) {                BaseRefVectorOf<XMLCh>* enumVector = XMLString::tokenizeString(decl->getEnumeration(), fMemoryManager);                int size = enumVector->size();                ok = (size == 1 &&                     (XMLString::equals(enumVector->elementAt(0), fgDefault) ||                      XMLString::equals(enumVector->elementAt(0), fgPreserve))) ||                     (size == 2 &&                     (XMLString::equals(enumVector->elementAt(0), fgDefault) &&                      XMLString::equals(enumVector->elementAt(1), fgPreserve))) ||                     (size == 2 &&                     (XMLString::equals(enumVector->elementAt(1), fgDefault) &&                      XMLString::equals(enumVector->elementAt(0), fgPreserve)));                delete enumVector;            }            if (!ok)                fScanner->getValidator()->emitError(XMLValid::IllegalXMLSpace);        }    }    // If we have a doc type handler, tell it about this attdef.    if (fDocTypeHandler)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?