dgxmlscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,902 行 · 第 1/5 页

CPP
1,902
字号
//  This method handles the high level logic of scanning the DOCType//  declaration. This calls the DTDScanner and kicks off both the scanning of//  the internal subset and the scanning of the external subset, if any.////  When we get here the '<!DOCTYPE' part has already been scanned, which is//  what told us that we had a doc type decl to parse.void DGXMLScanner::scanDocTypeDecl(){    if (fDocTypeHandler)        fDocTypeHandler->resetDocType();    // There must be some space after DOCTYPE    if (!fReaderMgr.skipPastSpaces())    {        emitError(XMLErrs::ExpectedWhitespace);        // Just skip the Doctype declaration and return        fReaderMgr.skipPastChar(chCloseAngle);        return;    }    // Get a buffer for the root element    XMLBufBid bbRootName(&fBufMgr);    //  Get a name from the input, which should be the name of the root    //  element of the upcoming content.    fReaderMgr.getName(bbRootName.getBuffer());    if (bbRootName.isEmpty())    {        emitError(XMLErrs::NoRootElemInDOCTYPE);        fReaderMgr.skipPastChar(chCloseAngle);        return;    }    //  Store the root element name for later check    setRootElemName(bbRootName.getRawBuffer());    //  This element obviously is not going to exist in the element decl    //  pool yet, but we need to call docTypeDecl. So force it into    //  the element decl pool, marked as being there because it was in    //  the DOCTYPE. Later, when its declared, the status will be updated.    //    //  Only do this if we are not reusing the validator! If we are reusing,    //  then look it up instead. It has to exist!    DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl    (        bbRootName.getRawBuffer()        , fEmptyNamespaceId        , DTDElementDecl::Any        , fGrammarPoolMemoryManager    );     Janitor<DTDElementDecl> rootDeclJanitor(rootDecl);        rootDecl->setCreateReason(DTDElementDecl::AsRootElem);    rootDecl->setExternalElemDeclaration(true);    if(!fUseCachedGrammar)     {        // this will break getRootElemId on DTDGrammar when        // cached grammars are in use, but         // why would one use this anyway???        ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));        rootDeclJanitor.release();    } else     {        // put this in the undeclared pool so it gets deleted...        XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer());        if (elemDecl)        {            rootDecl->setId(elemDecl->getId());        }        else        {            rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));            rootDeclJanitor.release();        }    }    // Skip any spaces after the name    fReaderMgr.skipPastSpaces();    //  And now if we are looking at a >, then we are done. It is not    //  required to have an internal or external subset, though why you    //  would not escapes me.    if (fReaderMgr.skippedChar(chCloseAngle)) {        //  If we have a doc type handler and advanced callbacks are enabled,        //  call the doctype event.        if (fDocTypeHandler)            fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);        return;    }    // either internal/external subset    if (fValScheme == Val_Auto && !fValidate)        fValidate = true;    bool    hasIntSubset = false;    bool    hasExtSubset = false;    XMLCh*  sysId = 0;    XMLCh*  pubId = 0;    DTDScanner dtdScanner    (        (DTDGrammar*) fGrammar        , fDocTypeHandler        , fGrammarPoolMemoryManager        , fMemoryManager    );    dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);    //  If the next character is '[' then we have no external subset cause    //  there is no system id, just the opening character of the internal    //  subset. Else, has to be an id.    //    // Just look at the next char, don't eat it.    if (fReaderMgr.peekNextChar() == chOpenSquare)    {        hasIntSubset = true;    }    else    {        // Indicate we have an external subset        hasExtSubset = true;        fHasNoDTD = false;        // Get buffers for the ids        XMLBufBid bbPubId(&fBufMgr);        XMLBufBid bbSysId(&fBufMgr);        // Get the external subset id        if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))        {            fReaderMgr.skipPastChar(chCloseAngle);            return;        }        // Get copies of the ids we got        pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);        sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);        // Skip spaces and check again for the opening of an internal subset        fReaderMgr.skipPastSpaces();        // Just look at the next char, don't eat it.        if (fReaderMgr.peekNextChar() == chOpenSquare) {            hasIntSubset = true;        }    }    // Insure that the ids get cleaned up, if they got allocated    ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);    ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);    //  If we have a doc type handler and advanced callbacks are enabled,    //  call the doctype event.    if (fDocTypeHandler)        fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);    //  Ok, if we had an internal subset, we are just past the [ character    //  and need to parse that first.    if (hasIntSubset)    {        // Eat the opening square bracket        fReaderMgr.getNextChar();        checkInternalDTD(hasExtSubset, sysId);        //  And try to scan the internal subset. If we fail, try to recover        //  by skipping forward tot he close angle and returning.        if (!dtdScanner.scanInternalSubset())        {            fReaderMgr.skipPastChar(chCloseAngle);            return;        }        //  Do a sanity check that some expanded PE did not propogate out of        //  the doctype. This could happen if it was terminated early by bad        //  syntax.        if (fReaderMgr.getReaderDepth() > 1)        {            emitError(XMLErrs::PEPropogated);            // Ask the reader manager to pop back down to the main level            fReaderMgr.cleanStackBackTo(1);        }        fReaderMgr.skipPastSpaces();    }    // And that should leave us at the closing > of the DOCTYPE line    if (!fReaderMgr.skippedChar(chCloseAngle))    {        //  Do a special check for the common scenario of an extra ] char at        //  the end. This is easy to recover from.        if (fReaderMgr.skippedChar(chCloseSquare)        &&  fReaderMgr.skippedChar(chCloseAngle))        {            emitError(XMLErrs::ExtraCloseSquare);        }         else        {            emitError(XMLErrs::UnterminatedDOCTYPE);            fReaderMgr.skipPastChar(chCloseAngle);        }    }    //  If we had an external subset, then we need to deal with that one    //  next. If we are reusing the validator, then don't scan it.    if (hasExtSubset) {        if (fUseCachedGrammar)        {            InputSource* sysIdSrc = resolveSystemId(sysId);            Janitor<InputSource> janSysIdSrc(sysIdSrc);            Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());            if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {                fDTDGrammar = (DTDGrammar*) grammar;                fGrammar = fDTDGrammar;                fValidator->setGrammar(fGrammar);                // we *cannot* identify the root element on                 // cached grammars; else we risk breaking multithreaded                // applications.  - NG                /*******                rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);                if (rootDecl)                    ((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId());                else {                    rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl                    (                        bbRootName.getRawBuffer()                        , fEmptyNamespaceId                        , DTDElementDecl::Any                        , fGrammarPoolMemoryManager                    );                    rootDecl->setCreateReason(DTDElementDecl::AsRootElem);                    rootDecl->setExternalElemDeclaration(true);                    ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));                }                *********/                return;            }        }        if (fLoadExternalDTD || fValidate)        {            // And now create a reader to read this entity            InputSource* srcUsed;            XMLReader* reader = fReaderMgr.createReader            (                sysId                , pubId                , false                , XMLReader::RefFrom_NonLiteral                , XMLReader::Type_General                , XMLReader::Source_External                , srcUsed                , fCalculateSrcOfs            );            // Put a janitor on the input source            Janitor<InputSource> janSrc(srcUsed);            //  If it failed then throw an exception            if (!reader)                ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId(), fMemoryManager);            if (fToCacheGrammar) {                unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());                const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);                fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);                ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);                fGrammarResolver->putGrammar(fGrammar);            }            //  In order to make the processing work consistently, we have to            //  make this look like an external entity. So create an entity            //  decl and fill it in and push it with the reader, as happens            //  with an external entity. Put a janitor on it to insure it gets            //  cleaned up. The reader manager does not adopt them.            const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };            DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager);            declDTD->setSystemId(sysId);            Janitor<DTDEntityDecl> janDecl(declDTD);            // Mark this one as a throw at end            reader->setThrowAtEnd(true);            // And push it onto the stack, with its pseudo name            fReaderMgr.pushReader(reader, declDTD);            // Tell it its not in an include section            dtdScanner.scanExtSubsetDecl(false, true);        }    }}bool DGXMLScanner::scanStartTag(bool& gotData){    //  Assume we will still have data until proven otherwise. It will only    //  ever be false if this is the root and its empty.    gotData = true;    //  Get the QName. In this case, we are not doing namespaces, so we just    //  use it as is and don't have to break it into parts.    if (!fReaderMgr.getName(fQNameBuf))    {        emitError(XMLErrs::ExpectedElementName);        fReaderMgr.skipToChar(chOpenAngle);        return false;    }    // Assume it won't be an empty tag    bool isEmpty = false;    // See if its the root element    const bool isRoot = fElemStack.isEmpty();    //  Lets try to look up the element in the validator's element decl pool    //  We can pass bogus values for the URI id and the base name. We know that    //  this can only be called if we are doing a DTD style validator and that    //  he will only look at the QName.    //    //  We *do not* tell him to fault in a decl if he does not find one - NG.    bool wasAdded = false;    const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();     XMLElementDecl* elemDecl = fGrammar->getElemDecl    (        fEmptyNamespaceId        , 0        , qnameRawBuf        , Grammar::TOP_LEVEL_SCOPE    );    // look in the undeclared pool:    if(!elemDecl)     {        elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);    }    if(!elemDecl)     {        wasAdded = true;        elemDecl = new (fMemoryManager) DTDElementDecl         (            qnameRawBuf            , fEmptyNamespaceId            , DTDElementDecl::Any            , fMemoryManager        );        elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));    }    if (fValidate) {        if (wasAdded)        {            // This is to tell the reuse Validator that this element was            // faulted-in, was not an element in the validator pool originally            elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);            fValidator->emitError            (                XMLValid::ElementNotDefined                , qnameRawBuf            );        }        // If its not marked declared, then emit an error        else if (!elemDecl->isDeclared())        {            fValidator->emitError            (                XMLValid::ElementNotDefined                , qnameRawBuf            );        }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?