igxmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页
CPP
1,855 行
fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString); ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr); fGrammarResolver->putGrammar(fGrammar); } // In order to make the processing work consistently, we have to // make this look like an external entity. So create an entity // decl and fill it in and push it with the reader, as happens // with an external entity. Put a janitor on it to insure it gets // cleaned up. The reader manager does not adopt them. const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull }; DTDEntityDecl* declDTD = new (fGrammarPoolMemoryManager) DTDEntityDecl(gDTDStr, false, fGrammarPoolMemoryManager); declDTD->setSystemId(sysId); Janitor<DTDEntityDecl> janDecl(declDTD); // Mark this one as a throw at end reader->setThrowAtEnd(true); // And push it onto the stack, with its pseudo name fReaderMgr.pushReader(reader, declDTD); // Tell it its not in an include section dtdScanner.scanExtSubsetDecl(false, true); } }}bool IGXMLScanner::scanStartTag(bool& gotData){ // Assume we will still have data until proven otherwise. It will only // ever be false if this is the root and its empty. gotData = true; // Get the QName. In this case, we are not doing namespaces, so we just // use it as is and don't have to break it into parts. if (!fReaderMgr.getName(fQNameBuf)) { emitError(XMLErrs::ExpectedElementName); fReaderMgr.skipToChar(chOpenAngle); return false; } // Assume it won't be an empty tag bool isEmpty = false; // Lets try to look up the element in the validator's element decl pool // We can pass bogus values for the URI id and the base name. We know that // this can only be called if we are doing a DTD style validator and that // he will only look at the QName. // // We tell him to fault in a decl if he does not find one. // Actually, we *don't* tell him to fault in a decl if he does not find one- NG bool wasAdded = false; const XMLCh *rawQName = fQNameBuf.getRawBuffer(); XMLElementDecl* elemDecl = fGrammar->getElemDecl ( fEmptyNamespaceId , 0 , rawQName , Grammar::TOP_LEVEL_SCOPE ); // look for it in the undeclared pool: if(!elemDecl) { elemDecl = fDTDElemNonDeclPool->getByKey(rawQName); } if(!elemDecl) { // we're assuming this must be a DTD element. DTD's can be // used with or without namespaces, but schemas cannot be used without // namespaces. wasAdded = true; elemDecl = new (fMemoryManager) DTDElementDecl ( rawQName , fEmptyNamespaceId , DTDElementDecl::Any , fMemoryManager ); elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl)); } // We do something different here according to whether we found the // element or not. if (wasAdded) { // If validating then emit an error if (fValidate) { // This is to tell the reuse Validator that this element was // faulted-in, was not an element in the validator pool originally elemDecl->setCreateReason(XMLElementDecl::JustFaultIn); fValidator->emitError ( XMLValid::ElementNotDefined , elemDecl->getFullName() ); } } else { // If its not marked declared and validating, then emit an error if (fValidate && !elemDecl->isDeclared()) { fValidator->emitError ( XMLValid::ElementNotDefined , elemDecl->getFullName() ); } } // See if its the root element const bool isRoot = fElemStack.isEmpty(); // Expand the element stack and add the new element fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum()); fElemStack.setValidationFlag(fValidate); // Validate the element if (fValidate) fValidator->validateElement(elemDecl); // If this is the first element and we are validating, check the root // element. if (isRoot) { fRootGrammar = fGrammar; if (fValidate) { // If a DocType exists, then check if it matches the root name there. if (fRootElemName && !XMLString::equals(fQNameBuf.getRawBuffer(), fRootElemName)) fValidator->emitError(XMLValid::RootElemNotLikeDocType); // Some validators may also want to check the root, call the // XMLValidator::checkRootElement if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId())) fValidator->emitError(XMLValid::RootElemNotLikeDocType); } } else { // If the element stack is not empty, then add this element as a // child of the previous top element. If its empty, this is the root // elem and is not the child of anything. fElemStack.addChild(elemDecl->getElementName(), true); } // Ask the element decl to clear out the 'provided' flag on all of its // att defs. elemDecl->resetDefs(); // Skip any whitespace after the name fReaderMgr.skipPastSpaces(); // We loop until we either see a /> or >, handling attribute/value // pairs until we get there. unsigned int attCount = 0; unsigned int curAttListSize = fAttrList->size(); wasAdded = false; fElemCount++; while (true) { // And get the next non-space character XMLCh nextCh = fReaderMgr.peekNextChar(); // If the next character is not a slash or closed angle bracket, // then it must be whitespace, since whitespace is required // between the end of the last attribute and the name of the next // one. if (attCount) { if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) { if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) { // Ok, skip by them and peek another char fReaderMgr.skipPastSpaces(); nextCh = fReaderMgr.peekNextChar(); } else { // Emit the error but keep on going emitError(XMLErrs::ExpectedWhitespace); } } } // Ok, here we first check for any of the special case characters. // If its not one, then we do the normal case processing, which // assumes that we've hit an attribute value, Otherwise, we do all // the special case checks. if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh)) { // Assume its going to be an attribute, so get a name from // the input. if (!fReaderMgr.getName(fAttNameBuf)) { emitError(XMLErrs::ExpectedAttrName); fReaderMgr.skipPastChar(chCloseAngle); return false; } // And next must be an equal sign if (!scanEq()) { static const XMLCh tmpList[] = { chSingleQuote, chDoubleQuote, chCloseAngle , chOpenAngle, chForwardSlash, chNull }; emitError(XMLErrs::ExpectedEqSign); // Try to sync back up by skipping forward until we either // hit something meaningful. const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); if ((chFound == chCloseAngle) || (chFound == chForwardSlash)) { // Jump back to top for normal processing of these continue; } else if ((chFound == chSingleQuote) || (chFound == chDoubleQuote) || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) { // Just fall through assuming that the value is to follow } else if (chFound == chOpenAngle) { // Assume a malformed tag and that new one is starting emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName()); return false; } else { // Something went really wrong return false; } } // See if this attribute is declared for this element. If we are // not validating of course it will not be at first, but we will // fault it into the pool (to avoid lots of redundant errors.) XMLCh * namePtr = fAttNameBuf.getRawBuffer(); XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr); // Add this attribute to the attribute list that we use to // pass them to the handler. We reuse its existing elements // but expand it as required. // Note that we want to this first since this will // make a copy of the namePtr; we can then make use of // that copy in the hashtable lookup that checks // for duplicates. This will mean we may have to update // the type of the XMLAttr later. XMLAttr* curAtt; if (attCount >= curAttListSize) { curAtt = new (fMemoryManager) XMLAttr ( -1 , namePtr , XMLUni::fgZeroLenString , XMLUni::fgZeroLenString , (attDef)?attDef->getType():XMLAttDef::CData , true , fMemoryManager ); fAttrList->addElement(curAtt); } else { curAtt = fAttrList->elementAt(attCount); curAtt->set ( -1 , namePtr , XMLUni::fgZeroLenString , XMLUni::fgZeroLenString , (attDef)?attDef->getType():XMLAttDef::CData ); curAtt->setSpecified(true); } // reset namePtr so it refers to newly-allocated memory namePtr = (XMLCh *)curAtt->getName(); if (!attDef) { // If there is a validation handler, then we are validating // so emit an error. if (fValidate) { fValidator->emitError ( XMLValid::AttNotDefinedForElement , fAttNameBuf.getRawBuffer() , elemDecl->getFullName() ); } unsigned int *curCountPtr = fUndeclaredAttrRegistry->get(namePtr); if(!curCountPtr) { curCountPtr = getNewUIntPtr(); *curCountPtr = fElemCount; fUndeclaredAttrRegistry->put((void *)namePtr, curCountPtr); } else if(*curCountPtr < fElemCount) *curCountPtr = fElemCount; else { emitError ( XMLErrs::AttrAlreadyUsedInSTag , namePtr , elemDecl->getFullName() ); } } else { // prepare for duplicate detection unsigned int *curCountPtr = fAttDefRegistry->get(attDef); if(!curCountPtr) { curCountPtr = getNewUIntPtr(); *curCountPtr = fElemCount; fAttDefRegistry->put(attDef, curCountPtr); } else if(*curCountPtr < fElemCount) *curCountPtr = fElemCount; else { emitError ( XMLErrs::AttrAlreadyUsedInSTag , attDef->getFullName() , elemDecl->getFullName() ); } } // Skip any whitespace before the value and then scan the att // value. This will come back normalized with entity refs and // char refs expanded. fReaderMgr.skipPastSpaces(); if (!scanAttValue(attDef, namePtr, fAttValueBuf)) { static const XMLCh tmpList[] = { chCloseAngle, chOpenAngle, chForwardSlash, chNull }; emitError(XMLErrs::ExpectedAttrValue); // It failed, so lets try to get synced back up. We skip // forward until we find some whitespace or one of the // chars in our list. const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); if ((chFound == chCloseAngle) || (chFound == chForwardSlash) || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) { // Just fall through and process this attribute, though // the value will be "". }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?