sgxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,881 行 · 第 1/5 页
CPP
1,881 行
case Token_PI : scanPI(); break; case Token_StartTag : scanStartTag(gotData); break; default : fReaderMgr.skipToChar(chOpenAngle); break; } if (orgReader != fReaderMgr.getCurrentReaderNum()) emitError(XMLErrs::PartialMarkupInEntity); // If we hit the end, then do the miscellaneous part if (!gotData) { // Do post-parse validation if required if (fValidate) { // We handle ID reference semantics at this level since // its required by XML 1.0. checkIDRefs(); // Then allow the validator to do any extra stuff it wants// fValidator->postParseValidation(); } // That went ok, so scan for any miscellaneous stuff scanMiscellaneous(); if (toCheckIdentityConstraint()) fICHandler->endDocument(); if (fDocHandler) fDocHandler->endDocument(); } } } // NOTE: // // In all of the error processing below, the emitError() call MUST come // before the flush of the reader mgr, or it will fail because it tries // to find out the position in the XML source of the error. catch(const XMLErrs::Codes) { // This is a 'first failure' exception, so reset and return failure fReaderMgr.reset(); return false; } catch(const XMLValid::Codes) { // This is a 'first fatal error' type exit, so reset and reuturn failure fReaderMgr.reset(); return false; } catch(const XMLException& excToCatch) { // Emit the error and catch any user exception thrown from here. Make // sure in all cases we flush the reader manager. fInException = true; try { if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) emitError ( XMLErrs::XMLException_Warning , excToCatch.getType() , excToCatch.getMessage() ); else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) emitError ( XMLErrs::XMLException_Fatal , excToCatch.getType() , excToCatch.getMessage() ); else emitError ( XMLErrs::XMLException_Error , excToCatch.getType() , excToCatch.getMessage() ); } catch(const OutOfMemoryException&) { throw; } catch(...) { // Reset and rethrow user error fReaderMgr.reset(); throw; } // Reset and return failure fReaderMgr.reset(); return false; } catch(const OutOfMemoryException&) { throw; } catch(...) { // Reset and rethrow original error fReaderMgr.reset(); throw; } // If we hit the end, then flush the reader manager if (!retVal) fReaderMgr.reset(); return retVal;}// ---------------------------------------------------------------------------// SGXMLScanner: Private scanning methods// ---------------------------------------------------------------------------// This method is called from scanStartTag() to handle the very raw initial// scan of the attributes. It just fills in the passed collection with// key/value pairs for each attribute. No processing is done on them at all.unsigned intSGXMLScanner::rawAttrScan(const XMLCh* const elemName , RefVectorOf<KVStringPair>& toFill , bool& isEmpty){ // Keep up with how many attributes we've seen so far, and how many // elements are available in the vector. This way we can reuse old // elements until we run out and then expand it. unsigned int attCount = 0; unsigned int curVecSize = toFill.size(); // Assume it is not empty isEmpty = false; // We loop until we either see a /> or >, handling key/value pairs util // we get there. We place them in the passed vector, which we will expand // as required to hold them. while (true) { // Get the next character, which should be non-space XMLCh nextCh = fReaderMgr.peekNextChar(); // If the next character is not a slash or closed angle bracket, // then it must be whitespace, since whitespace is required // between the end of the last attribute and the name of the next // one. // if (attCount) { if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) { if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) { // Ok, skip by them and get another char fReaderMgr.getNextChar(); fReaderMgr.skipPastSpaces(); nextCh = fReaderMgr.peekNextChar(); } else { // Emit the error but keep on going emitError(XMLErrs::ExpectedWhitespace); } } } // Ok, here we first check for any of the special case characters. // If its not one, then we do the normal case processing, which // assumes that we've hit an attribute value, Otherwise, we do all // the special case checks. if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh)) { // Assume its going to be an attribute, so get a name from // the input. if (!fReaderMgr.getName(fAttNameBuf)) { emitError(XMLErrs::ExpectedAttrName); fReaderMgr.skipPastChar(chCloseAngle); return attCount; } // And next must be an equal sign if (!scanEq()) { static const XMLCh tmpList[] = { chSingleQuote, chDoubleQuote, chCloseAngle , chOpenAngle, chForwardSlash, chNull }; emitError(XMLErrs::ExpectedEqSign); // Try to sync back up by skipping forward until we either // hit something meaningful. const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); if ((chFound == chCloseAngle) || (chFound == chForwardSlash)) { // Jump back to top for normal processing of these continue; } else if ((chFound == chSingleQuote) || (chFound == chDoubleQuote) || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) { // Just fall through assuming that the value is to follow } else if (chFound == chOpenAngle) { // Assume a malformed tag and that new one is starting emitError(XMLErrs::UnterminatedStartTag, elemName); return attCount; } else { // Something went really wrong return attCount; } } // Next should be the quoted attribute value. We just do a simple // and stupid scan of this value. The only thing we do here // is to expand entity references. if (!basicAttrValueScan(fAttNameBuf.getRawBuffer(), fAttValueBuf)) { static const XMLCh tmpList[] = { chCloseAngle, chOpenAngle, chForwardSlash, chNull }; emitError(XMLErrs::ExpectedAttrValue); // It failed, so lets try to get synced back up. We skip // forward until we find some whitespace or one of the // chars in our list. const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); if ((chFound == chCloseAngle) || (chFound == chForwardSlash) || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) { // Just fall through and process this attribute, though // the value will be "". } else if (chFound == chOpenAngle) { // Assume a malformed tag and that new one is starting emitError(XMLErrs::UnterminatedStartTag, elemName); return attCount; } else { // Something went really wrong return attCount; } } // Make sure that the name is basically well formed for namespace // enabled rules. It either has no colons, or it has one which // is neither the first or last char. const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon); if (colonFirst != -1) { const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon); if (colonFirst != colonLast) { emitError(XMLErrs::TooManyColonsInName); continue; } else if ((colonFirst == 0) || (colonLast == (int)fAttNameBuf.getLen() - 1)) { emitError(XMLErrs::InvalidColonPos); continue; } } // And now lets add it to the passed collection. If we have not // filled it up yet, then we use the next element. Else we add // a new one. KVStringPair* curPair = 0; if (attCount >= curVecSize) { curPair = new (fMemoryManager) KVStringPair ( fAttNameBuf.getRawBuffer() , fAttValueBuf.getRawBuffer() , fMemoryManager ); toFill.addElement(curPair); } else { curPair = toFill.elementAt(attCount); curPair->set(fAttNameBuf.getRawBuffer(), fAttValueBuf.getRawBuffer()); } // And bump the count of attributes we've gotten attCount++; // And go to the top again for another attribute continue; } // It was some special case character so do all of the checks and // deal with it. if (!nextCh) ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); if (nextCh == chForwardSlash) { fReaderMgr.getNextChar(); isEmpty = true; if (!fReaderMgr.skippedChar(chCloseAngle)) emitError(XMLErrs::UnterminatedStartTag, elemName); break; } else if (nextCh == chCloseAngle) { fReaderMgr.getNextChar(); break; } else if (nextCh == chOpenAngle) { // Check for this one specially, since its going to be common // and it is kind of auto-recovering since we've already hit the // next open bracket, which is what we would have seeked to (and // skipped this whole tag.) emitError(XMLErrs::UnterminatedStartTag, elemName); break; } else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote)) { // Check for this one specially, which is probably a missing // attribute name, e.g. ="value". Just issue expected name // error and eat the quoted string, then jump back to the // top again. emitError(XMLErrs::ExpectedAttrName); fReaderMgr.getNextChar(); fReaderMgr.skipQuotedString(nextCh); fReaderMgr.skipPastSpaces(); continue; } } return attCount;}// This method will kick off the scanning of the primary content of the// document, i.e. the elements.bool SGXMLScanner::scanContent(){ // Go into a loop until we hit the end of the root element, or we fall // out because there is no root element. // // We have to do kind of a deeply nested double loop here in order to // avoid doing the setup/teardown of the exception handler on each // round. Doing it this way we only do it when an exception actually // occurs. bool gotData = true; bool inMarkup = false; while (gotData) { try { while (gotData) { // Sense what the next top level token is. According to what
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?