wfxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,883 行 · 第 1/5 页
CPP
1,883 行
( elemDecl->getElementName()->getPrefix() , ElemStack::Mode_Element ); // Now we can update the element stack fElemStack.setCurrentURI(uriId); // Tell the document handler about this start tag if (fDocHandler) { fDocHandler->startElement ( *elemDecl , uriId , elemDecl->getElementName()->getPrefix() , *fAttrList , attCount , false , isRoot ); } // If empty, validate content right now if we are validating and then // pop the element stack top. Else, we have to update the current stack // top's namespace mapping elements. if (isEmpty) { // Pop the element stack back off since it'll never be used now fElemStack.popTop(); // If we have a doc handler, tell it about the end tag if (fDocHandler) { fDocHandler->endElement ( *elemDecl , uriId , isRoot , elemDecl->getElementName()->getPrefix() ); } // If the elem stack is empty, then it was an empty root if (isRoot) gotData = false; } return true;}unsigned intWFXMLScanner::resolveQName(const XMLCh* const qName , XMLBuffer& prefixBuf , const short mode , int& prefixColonPos){ // Lets split out the qName into a URI and name buffer first. The URI // can be empty. prefixColonPos = XMLString::indexOf(qName, chColon); if (prefixColonPos == -1) { // Its all name with no prefix, so put the whole thing into the name // buffer. Then map the empty string to a URI, since the empty string // represents the default namespace. This will either return some // explicit URI which the default namespace is mapped to, or the // the default global namespace. bool unknown = false; prefixBuf.reset(); return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown); } else { // Copy the chars up to but not including the colon into the prefix // buffer. prefixBuf.set(qName, prefixColonPos); // Watch for the special namespace prefixes. We always map these to // special URIs. 'xml' gets mapped to the official URI that its defined // to map to by the NS spec. xmlns gets mapped to a special place holder // URI that we define (so that it maps to something checkable.) const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer(); if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) { // if this is an element, it is an error to have xmlns as prefix if (mode == ElemStack::Mode_Element) emitError(XMLErrs::NoXMLNSAsElementPrefix, qName); return fXMLNSNamespaceId; } else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) { return fXMLNamespaceId; } else { bool unknown = false; unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown); if (unknown) emitError(XMLErrs::UnknownPrefix, prefixRawBuf); return uriId; } }}// ---------------------------------------------------------------------------// XMLScanner: Private parsing methods// ---------------------------------------------------------------------------bool WFXMLScanner::scanAttValue(const XMLCh* const attrName , XMLBuffer& toFill){ // Reset the target buffer toFill.reset(); // Get the next char which must be a single or double quote XMLCh quoteCh; if (!fReaderMgr.skipIfQuote(quoteCh)) return false; // We have to get the current reader because we have to ignore closing // quotes until we hit the same reader again. const unsigned int curReader = fReaderMgr.getCurrentReaderNum(); // Loop until we get the attribute value. Note that we use a double // loop here to avoid the setup/teardown overhead of the exception // handler on every round. XMLCh nextCh; XMLCh secondCh = 0; bool gotLeadingSurrogate = false; bool escaped; while (true) { try { while(true) { nextCh = fReaderMgr.getNextChar(); if (!nextCh) ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); // Check for our ending quote in the same entity if (nextCh == quoteCh) { if (curReader == fReaderMgr.getCurrentReaderNum()) return true; // Watch for spillover into a previous entity if (curReader > fReaderMgr.getCurrentReaderNum()) { emitError(XMLErrs::PartialMarkupInEntity); return false; } } // Check for an entity ref now, before we let it affect our // whitespace normalization logic below. We ignore the empty flag // in this one. escaped = false; if (nextCh == chAmpersand) { if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned) { gotLeadingSurrogate = false; continue; } } else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { // Deal with surrogate pairs // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. if (gotLeadingSurrogate) { emitError(XMLErrs::Expected2ndSurrogateChar); } else gotLeadingSurrogate = true; } else { // If its a trailing surrogate, make sure that we are // prepared for that. Else, its just a regular char so make // sure that we were not expected a trailing surrogate. if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) { // Its trailing, so make sure we were expecting it if (!gotLeadingSurrogate) emitError(XMLErrs::Unexpected2ndSurrogateChar); } else { // Its just a char, so make sure we were not expecting a // trailing surrogate. if (gotLeadingSurrogate) { emitError(XMLErrs::Expected2ndSurrogateChar); } // Its got to at least be a valid XML character else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { XMLCh tmpBuf[9]; XMLString::binToText ( nextCh , tmpBuf , 8 , 16 , fMemoryManager ); emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); } } gotLeadingSurrogate = false; } // If its not escaped, then make sure its not a < character, which // is not allowed in attribute values. if (!escaped) { if (nextCh == chOpenAngle) emitError(XMLErrs::BracketInAttrValue, attrName); else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) nextCh = chSpace; } // Else add it to the buffer toFill.append(nextCh); if (secondCh) { toFill.append(secondCh); secondCh=0; } } } catch(const EndOfEntityException&) { // Just eat it and continue. gotLeadingSurrogate = false; escaped = false; } } return true;}// This method scans a CDATA section. It collects the character into one// of the temp buffers and calls the document handler, if any, with the// characters. It assumes that the <![CDATA string has been scanned before// this call.void WFXMLScanner::scanCDSection(){ static const XMLCh CDataClose[] = { chCloseSquare, chCloseAngle, chNull }; // The next character should be the opening square bracket. If not // issue an error, but then try to recover by skipping any whitespace // and checking again. if (!fReaderMgr.skippedChar(chOpenSquare)) { emitError(XMLErrs::ExpectedOpenSquareBracket); fReaderMgr.skipPastSpaces(); // If we still don't find it, then give up, else keep going if (!fReaderMgr.skippedChar(chOpenSquare)) return; } // Get a buffer for this XMLBufBid bbCData(&fBufMgr); // We just scan forward until we hit the end of CDATA section sequence. // CDATA is effectively a big escape mechanism so we don't treat markup // characters specially here. bool emittedError = false; bool gotLeadingSurrogate = false; while (true) { const XMLCh nextCh = fReaderMgr.getNextChar(); // Watch for unexpected end of file if (!nextCh) { emitError(XMLErrs::UnterminatedCDATASection); ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); } // If this is a close square bracket it could be our closing // sequence. if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose)) { // make sure we were not expecting a trailing surrogate. if (gotLeadingSurrogate) emitError(XMLErrs::Expected2ndSurrogateChar); // If we have a doc handler, call it if (fDocHandler) { fDocHandler->docCharacters ( bbCData.getRawBuffer() , bbCData.getLen() , true ); } // And we are done break; } // Make sure its a valid character. But if we've emitted an error // already, don't bother with the overhead since we've already told // them about it. if (!emittedError) { // Deal with surrogate pairs if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { // Its a leading surrogate. If we already got one, then // issue an error, else set leading flag to make sure that // we look for a trailing next time. if (gotLeadingSurrogate) emitError(XMLErrs::Expected2ndSurrogateChar); else gotLeadingSurrogate = true; } else { // If its a trailing surrogate, make sure that we are // prepared for that. Else, its just a regular char so make // sure that we were not expected a trailing surrogate. if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) { // Its trailing, so make sure we were expecting it if (!gotLeadingSurrogate) emitError(XMLErrs::Unexpected2ndSurrogateChar); } else { // Its just a char, so make sure we were not expecting a // trailing surrogate. if (gotLeadingSurrogate) emitError(XMLErrs::Expected2ndSurrogateChar); // Its got to at least be a valid XML character else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) { XMLCh tmpBuf[9]; XMLString::binToText ( nextCh , tmpBuf , 8 , 16 , fMemoryManager ); emitError(XMLErrs::InvalidCharacter, tmpBuf); emittedError = true; } } gotLeadingSurrogate = false; } } // Add it to the buffer bbCData.append(nextCh); }}void WFXMLScanner::scanCharData(XML
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?