wfxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,883 行 · 第 1/5 页
CPP
1,883 行
{ // Just skips over it // REVISIT: Should we issue a warning static const XMLCh doctypeIE[] = { chOpenSquare, chCloseAngle, chNull }; XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE); if (nextCh == chOpenSquare) fReaderMgr.skipPastChar(chCloseSquare); fReaderMgr.skipPastChar(chCloseAngle);}bool WFXMLScanner::scanStartTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the root and its empty. gotData = true; // Get the QName. In this case, we are not doing namespaces, so we just // use it as is and don't have to break it into parts. if (!fReaderMgr.getName(fQNameBuf)) { emitError(XMLErrs::ExpectedElementName); fReaderMgr.skipToChar(chOpenAngle); return false; } // Assume it won't be an empty tag bool isEmpty = false; // See if its the root element const bool isRoot = fElemStack.isEmpty(); // Lets try to look up the element const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); XMLElementDecl* elemDecl = fElementLookup->get(qnameRawBuf); if (!elemDecl) { if (fElementIndex < fElements->size()) { elemDecl = fElements->elementAt(fElementIndex); } else { elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl ( fGrammarPoolMemoryManager ); fElements->addElement(elemDecl); } elemDecl->setElementName(XMLUni::fgZeroLenString, qnameRawBuf, fEmptyNamespaceId); fElementLookup->put((void*)elemDecl->getFullName(), elemDecl); fElementIndex++; } // Expand the element stack and add the new element fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum()); // Skip any whitespace after the name fReaderMgr.skipPastSpaces(); // We loop until we either see a /> or >, handling attribute/value // pairs until we get there. unsigned int attCount = 0; unsigned int curAttListSize = fAttrList->size(); while (true) { // And get the next non-space character XMLCh nextCh = fReaderMgr.peekNextChar(); // If the next character is not a slash or closed angle bracket, // then it must be whitespace, since whitespace is required // between the end of the last attribute and the name of the next // one. if (attCount) { if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) { if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) { // Ok, skip by them and peek another char fReaderMgr.skipPastSpaces(); nextCh = fReaderMgr.peekNextChar(); } else { // Emit the error but keep on going emitError(XMLErrs::ExpectedWhitespace); } } } // Ok, here we first check for any of the special case characters. // If its not one, then we do the normal case processing, which // assumes that we've hit an attribute value, Otherwise, we do all // the special case checks. if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh)) { // Assume its going to be an attribute, so get a name from // the input. if (!fReaderMgr.getName(fAttNameBuf)) { emitError(XMLErrs::ExpectedAttrName); fReaderMgr.skipPastChar(chCloseAngle); return false; } // And next must be an equal sign if (!scanEq()) { static const XMLCh tmpList[] = { chSingleQuote, chDoubleQuote, chCloseAngle , chOpenAngle, chForwardSlash, chNull }; emitError(XMLErrs::ExpectedEqSign); // Try to sync back up by skipping forward until we either // hit something meaningful. const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); if ((chFound == chCloseAngle) || (chFound == chForwardSlash)) { // Jump back to top for normal processing of these continue; } else if ((chFound == chSingleQuote) || (chFound == chDoubleQuote) || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) { // Just fall through assuming that the value is to follow } else if (chFound == chOpenAngle) { // Assume a malformed tag and that new one is starting emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf); return false; } else { // Something went really wrong return false; } } // See if this attribute is declared more than one for this element. const XMLCh* attNameRawBuf = fAttNameBuf.getRawBuffer(); unsigned int attNameHash = XMLString::hash(attNameRawBuf, 109, fMemoryManager); if (attCount) { for (unsigned int k=0; k < attCount; k++) { if (fAttrNameHashList->elementAt(k) == attNameHash) { if ( XMLString::equals ( fAttrList->elementAt(k)->getName() , attNameRawBuf ) ) { emitError ( XMLErrs::AttrAlreadyUsedInSTag , attNameRawBuf , qnameRawBuf ); break; } } } } // Skip any whitespace before the value and then scan the att // value. This will come back normalized with entity refs and // char refs expanded. fReaderMgr.skipPastSpaces(); if (!scanAttValue(attNameRawBuf, fAttValueBuf)) { static const XMLCh tmpList[] = { chCloseAngle, chOpenAngle, chForwardSlash, chNull }; emitError(XMLErrs::ExpectedAttrValue); // It failed, so lets try to get synced back up. We skip // forward until we find some whitespace or one of the // chars in our list. const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); if ((chFound == chCloseAngle) || (chFound == chForwardSlash) || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) { // Just fall through and process this attribute, though // the value will be "". } else if (chFound == chOpenAngle) { // Assume a malformed tag and that new one is starting emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf); return false; } else { // Something went really wrong return false; } } // Add this attribute to the attribute list that we use to // pass them to the handler. We reuse its existing elements // but expand it as required. XMLAttr* curAtt; if (attCount >= curAttListSize) { curAtt = new (fMemoryManager) XMLAttr ( -1 , attNameRawBuf , XMLUni::fgZeroLenString , fAttValueBuf.getRawBuffer() , XMLAttDef::CData , true , fMemoryManager ); fAttrList->addElement(curAtt); fAttrNameHashList->addElement(attNameHash); } else { curAtt = fAttrList->elementAt(attCount); curAtt->set ( -1 , attNameRawBuf , XMLUni::fgZeroLenString , fAttValueBuf.getRawBuffer() ); curAtt->setSpecified(true); fAttrNameHashList->setElementAt(attNameHash, attCount); } attCount++; // And jump back to the top of the loop continue; } // It was some special case character so do all of the checks and // deal with it. if (!nextCh) ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); if (nextCh == chForwardSlash) { fReaderMgr.getNextChar(); isEmpty = true; if (!fReaderMgr.skippedChar(chCloseAngle)) emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf); break; } else if (nextCh == chCloseAngle) { fReaderMgr.getNextChar(); break; } else if (nextCh == chOpenAngle) { // Check for this one specially, since its going to be common // and it is kind of auto-recovering since we've already hit the // next open bracket, which is what we would have seeked to (and // skipped this whole tag.) emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName()); break; } else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote)) { // Check for this one specially, which is probably a missing // attribute name, e.g. ="value". Just issue expected name // error and eat the quoted string, then jump back to the // top again. emitError(XMLErrs::ExpectedAttrName); fReaderMgr.getNextChar(); fReaderMgr.skipQuotedString(nextCh); fReaderMgr.skipPastSpaces(); continue; } } // If empty, validate content right now if we are validating and then // pop the element stack top. Else, we have to update the current stack // top's namespace mapping elements. if (isEmpty) { // Pop the element stack back off since it'll never be used now fElemStack.popTop(); // If the elem stack is empty, then it was an empty root if (isRoot) gotData = false; } // If we have a document handler, then tell it about this start tag. We // don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send // any prefix since its just one big name if we are not doing namespaces. if (fDocHandler) { fDocHandler->startElement ( *elemDecl , fEmptyNamespaceId , 0 , *fAttrList , attCount , isEmpty , isRoot ); } return true;}// This method is called to scan a start tag when we are processing// namespaces. There are two different versions of this method, one for// namespace aware processing an done for non-namespace aware processing.//// This method is called after we've scanned the < of a start tag. So we// have to get the element name, then scan the attributes, after which// we are either going to see >, />, or attributes followed by one of those// sequences.bool WFXMLScanner::scanStartTagNS(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the root and its empty. gotData = true; // The current position is after the open bracket, so we need to read in // in the element name. if (!fReaderMgr.getName(fQNameBuf)) { emitError(XMLErrs::ExpectedElementName); fReaderMgr.skipToChar(chOpenAngle); return false; } // See if its the root element const bool isRoot = fElemStack.isEmpty(); // Assume it won't be an empty tag bool isEmpty = false; // Skip any whitespace after the name fReaderMgr.skipPastSpaces(); // Lets try to look up the element const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); XMLElementDecl* elemDecl = fElementLookup->get(qnameRawBuf); if (!elemDecl) { if (!XMLString::compareNString(qnameRawBuf, XMLUni::fgXMLNSColonString, 6)) emitError(XMLErrs::NoXMLNSAsElementPrefix, qnameRawBuf); if (fElementIndex < fElements->size()) { elemDecl = fElements->elementAt(fElementIndex); } else { elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl ( fGrammarPoolMemoryManager );
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?