dtdscanner.cpp

来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,936 行 · 第 1/5 页

CPP
1,936
字号
            }        }    }    //    //  We saw the terminating parenthesis so lets check for any repetition    //  character, and create a node for that, making the head node the child    //  of it.    //    XMLCh repCh = fReaderMgr->peekNextChar();    ContentSpecNode* retNode = makeRepNode(repCh, headNode, fGrammarPoolMemoryManager);    if (retNode != headNode)        fReaderMgr->getNextChar();    return retNode;}////  We get here after the '<!--' part of the comment. We scan past the//  terminating '-->' It will calls the appropriate handler with the comment//  text, if one is provided. A comment can be in either the document or//  the DTD, so the fInDocument flag is used to know which handler to send//  it to.//void DTDScanner::scanComment(){    enum States    {        InText        , OneDash        , TwoDashes    };    // Get a buffer for this    XMLBufBid bbComment(fBufMgr);    //    //  Get the comment text into a temp buffer. Be sure to use temp buffer    //  two here, since its to be used for stuff that is potentially longer    //  than just a name.    //    bool   gotLeadingSurrogate = false;    States curState = InText;    while (true)    {        // Get the next character        const XMLCh nextCh = fReaderMgr->getNextChar();        //  Watch for an end of file        if (!nextCh)        {            fScanner->emitError(XMLErrs::UnterminatedComment);            ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);        }        // Check for correct surrogate pairs        if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))        {            if (gotLeadingSurrogate)                fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);            else                gotLeadingSurrogate = true;        }        else        {            if (gotLeadingSurrogate)            {                if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))                    fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);            }            // Its got to at least be a valid XML character            else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) {                XMLCh tmpBuf[9];                XMLString::binToText                (                    nextCh                    , tmpBuf                    , 8                    , 16                    , fMemoryManager                );                fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);            }            gotLeadingSurrogate = false;        }        if (curState == InText)        {            // If its a dash, go to OneDash state. Otherwise take as text            if (nextCh == chDash)                curState = OneDash;            else                bbComment.append(nextCh);        }         else if (curState == OneDash)        {            //            //  If its another dash, then we change to the two dashes states.            //  Otherwise, we have to put in the deficit dash and the new            //  character and go back to InText.            //            if (nextCh == chDash)            {                curState = TwoDashes;            }             else            {                bbComment.append(chDash);                bbComment.append(nextCh);                curState = InText;            }        }         else if (curState == TwoDashes)        {            // The next character must be the closing bracket            if (nextCh != chCloseAngle)            {                fScanner->emitError(XMLErrs::IllegalSequenceInComment);                fReaderMgr->skipPastChar(chCloseAngle);                return;            }            break;        }    }    // If there is a doc type handler, then pass on the comment stuff    if (fDocTypeHandler)        fDocTypeHandler->doctypeComment(bbComment.getRawBuffer());}bool DTDScanner::scanContentSpec(DTDElementDecl& toFill){    //    //  Check for for a couple of the predefined content type strings. If    //  its not one of these, its got to be a parenthesized reg ex type    //  expression.    //    if (fReaderMgr->skippedString(XMLUni::fgEmptyString))    {        toFill.setModelType(DTDElementDecl::Empty);        return true;    }    if (fReaderMgr->skippedString(XMLUni::fgAnyString))    {        toFill.setModelType(DTDElementDecl::Any);        return true;    }    // Its got to be a parenthesized regular expression    if (!fReaderMgr->skippedChar(chOpenParen))    {        fScanner->emitError        (            XMLErrs::ExpectedContentSpecExpr            , toFill.getFullName()        );        return false;    }    // Get the current reader id, so we can test for partial markup    const unsigned int curReader = fReaderMgr->getCurrentReaderNum();    // We could have a PE ref here, but don't require space    checkForPERef(false, true);    //    //  Now we look for a PCDATA string. If its PCDATA, then it must be a    //  MIXED model. Otherwise, it must be a regular list of children in    //  a regular expression perhaps.    //    bool status;    if (fReaderMgr->skippedString(XMLUni::fgPCDATAString))    {        // Set the model to mixed        toFill.setModelType(DTDElementDecl::Mixed_Simple);        status = scanMixed(toFill);        //        //  If we are validating we have to check that there are no multiple        //  uses of any child elements.        //        if (fScanner->getDoValidation())        {            if (((const MixedContentModel*)toFill.getContentModel())->hasDups())                fScanner->getValidator()->emitError(XMLValid::RepElemInMixed);        }    }     else    {        //        //  We have to do a recursive scan of the content model. Create a        //  buffer for it to use, for efficiency. It returns the top ofthe        //  content spec node tree, which we set if successful.        //        toFill.setModelType(DTDElementDecl::Children);        XMLBufBid bbTmp(fBufMgr);        ContentSpecNode* resNode = scanChildren(toFill, bbTmp.getBuffer());        status = (resNode != 0);        if (status)            toFill.setContentSpec(resNode);    }    // Make sure we are on the same reader as where we started    if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())        fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);    return status;}void DTDScanner::scanDefaultDecl(DTDAttDef& toFill){    if (fReaderMgr->skippedString(XMLUni::fgRequiredString))    {        toFill.setDefaultType(XMLAttDef::Required);        return;    }    if (fReaderMgr->skippedString(XMLUni::fgImpliedString))    {        toFill.setDefaultType(XMLAttDef::Implied);        return;    }    if (fReaderMgr->skippedString(XMLUni::fgFixedString))    {        //        //  There must be space before the fixed value. If there is not, then        //  emit an error but keep going.        //        if (!fReaderMgr->skippedSpace())            fScanner->emitError(XMLErrs::ExpectedWhitespace);        else            fReaderMgr->skipPastSpaces();        toFill.setDefaultType(XMLAttDef::Fixed);    }     else    {        toFill.setDefaultType(XMLAttDef::Default);    }    //    //  If we got here, its fixed or default, so we need to get a value.    //  If we don't, then emit an error but just set the default value to    //  an empty string and try to keep going.    //    // Check for PE ref or optional whitespace    checkForPERef(false, true);    XMLBufBid bbValue(fBufMgr);    if (!scanAttValue(toFill.getFullName(), bbValue.getBuffer(), toFill.getType()))        fScanner->emitError(XMLErrs::ExpectedDefAttrDecl);    toFill.setValue(bbValue.getRawBuffer());}////  This is called after seeing '<!ELEMENT' which indicates that an element//  markup is starting. This guy scans the rest of it and adds it to the//  element decl pool if it has not already been declared.//void DTDScanner::scanElementDecl(){    //    //  Space is legal (required actually) here so check for a PE ref. If    //  we don't get our whitespace, then issue and error, but try to keep    //  going.    //    if (!checkForPERef(false, true))        fScanner->emitError(XMLErrs::ExpectedWhitespace);    // Get a buffer for the element name and scan in the name    XMLBufBid bbName(fBufMgr);    if (!fReaderMgr->getName(bbName.getBuffer()))    {        fScanner->emitError(XMLErrs::ExpectedElementName);        fReaderMgr->skipPastChar(chCloseAngle);        return;    }    // Look this guy up in the element decl pool    DTDElementDecl* decl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);    //    //  If it does not exist, then we need to create it. If it does and    //  its marked as declared, then that's an error, but we still need to    //  scan over the content model so use the dummy declaration that the    //  parsing code can fill in.    //    if (decl)    {        if (decl->isDeclared())        {            if (fScanner->getDoValidation())                fScanner->getValidator()->emitError(XMLValid::ElementAlreadyExists, bbName.getRawBuffer());            if (!fDumElemDecl)                fDumElemDecl = new (fMemoryManager) DTDElementDecl                (                    bbName.getRawBuffer()                    , fEmptyNamespaceId                    , DTDElementDecl::Any                    , fMemoryManager                );            else                fDumElemDecl->setElementName(bbName.getRawBuffer(),fEmptyNamespaceId);        }    }     else    {        //        //  Create the new empty declaration to fill in and put it into        //  the decl pool.        //        decl = new (fGrammarPoolMemoryManager) DTDElementDecl        (            bbName.getRawBuffer()            , fEmptyNamespaceId            , DTDElementDecl::Any            , fGrammarPoolMemoryManager        );        fDTDGrammar->putElemDecl(decl);    }    // Set a flag for whether we will ignore this one    const bool isIgnored = (decl == fDumElemDecl);    // Mark this one if being externally declared    decl->setExternalElemDeclaration(isReadingExternalEntity());    // Mark this one as being declared    decl->setCreateReason(XMLElementDecl::Declared);    // Another check for a PE ref, with at least required whitespace    if (!checkForPERef(false, true))        fScanner->emitError(XMLErrs::ExpectedWhitespace);    // And now scan the content model for this guy.    if (!scanContentSpec(*decl))    {        fReaderMgr->skipPastChar(chCloseAngle);        return;    }    // Another check for a PE ref, but we don't require whitespace here    checkForPERef(false, true);    // And we should have the ending angle bracket    if (!fReaderMgr->skippedChar(chCloseAngle))    {        fScanner->emitError(XMLErrs::UnterminatedElementDecl, bbName.getRawBuffer());        fReaderMgr->skipPastChar(chCloseAngle);    }    //    //  If we have a DTD handler tell it about the new element decl. We    //  tell it if its one that can be ignored, cause its an override of a    //  previously existing decl. If it is being ignored, only call back    //  if advanced callbacks are enabled.    //    if (fDocTypeHandler)        fDocTypeHandler->elementDecl(*decl, isIgnored);}////  This method will process a general or parameter entity reference. The//  entity name and entity text will be stored in the entity pool. The value//  of the entity will be scanned for any other parameter entity or char//  references which will be expanded. So the stored value can only have//  general entity references when done.//void DTDScanner::scanEntityDecl(){    //    //  Space is required here, but we cannot check for a PE Ref since    //  there could be a legal (no-ref) percent sign here. Since any    //  entit

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?