xmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页
CPP
1,855 行
bbTarget.append(nextCh); } } else { // No target, but make sure its terminated ok if (!fReaderMgr.skippedChar(chQuestion)) { emitError(XMLErrs::UnterminatedPI); fReaderMgr.skipPastChar(chCloseAngle); return; } if (!fReaderMgr.skippedChar(chCloseAngle)) { emitError(XMLErrs::UnterminatedPI); fReaderMgr.skipPastChar(chCloseAngle); return; } } // Point the target pointer at the raw data targetPtr = bbTarget.getRawBuffer(); // If we have a handler, then call it if (fDocHandler) { fDocHandler->docPI ( namePtr , targetPtr ); } //mark PI is seen within the current element if (! fElemStack.isEmpty()) fElemStack.setCommentOrPISeen();}// Scans all the input from the start of the file to the root element.// There does not have to be anything in the prolog necessarily, but usually// there is at least an XMLDecl.//// On exit from here we are either at the end of the file or about to read// the opening < of the root element.void XMLScanner::scanProlog(){ // Get a buffer for whitespace processing XMLBufBid bbCData(&fBufMgr); // Loop through the prolog. If there is no content, this could go all // the way to the end of the file. try { while (true) { const XMLCh nextCh = fReaderMgr.peekNextChar(); if (nextCh == chOpenAngle) { // Ok, it could be the xml decl, a comment, the doc type line, // or the start of the root element. if (checkXMLDecl(true)) { // There shall be at lease --ONE-- space in between // the tag '<?xml' and the VersionInfo. // // If we are not at line 1, col 6, then the decl was not // the first text, so its invalid. const XMLReader* curReader = fReaderMgr.getCurrentReader(); if ((curReader->getLineNumber() != 1) || (curReader->getColumnNumber() != 7)) { emitError(XMLErrs::XMLDeclMustBeFirst); } scanXMLDecl(Decl_XML); } else if (fReaderMgr.skippedString(XMLUni::fgPIString)) { scanPI(); } else if (fReaderMgr.skippedString(XMLUni::fgCommentString)) { scanComment(); } else if (fReaderMgr.skippedString(XMLUni::fgDocTypeString)) { scanDocTypeDecl(); // if reusing grammar, this has been validated already in first scan // skip for performance if (fValidate && !fGrammar->getValidated()) { // validate the DTD scan so far fValidator->preContentValidation(fUseCachedGrammar, true); } } else { // Assume its the start of the root element return; } } else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) { // If we have a document handler then gather up the // whitespace and call back. Otherwise just skip over spaces. if (fDocHandler) { fReaderMgr.getSpaces(bbCData.getBuffer()); fDocHandler->ignorableWhitespace ( bbCData.getRawBuffer() , bbCData.getLen() , false ); } else { fReaderMgr.skipPastSpaces(); } } else { emitError(XMLErrs::InvalidDocumentStructure); // Watch for end of file and break out if (!nextCh) break; else fReaderMgr.skipPastChar(chCloseAngle); } } } catch(const EndOfEntityException&) { // We should never get an end of entity here. They should only // occur within the doc type scanning method, and not leak out to // here. emitError ( XMLErrs::UnexpectedEOE , "in prolog" ); }}// Scans the <?xml .... ?> line. This stuff is all sequential so we don't// do any state machine loop here. We just bull straight through it. It ends// past the closing bracket. If there is a document handler, then its called// on the XMLDecl callback.//// On entry, the <?xml has been scanned, and we pick it up from there.//// NOTE: In order to provide good recovery from bad XML here, we try to be// very flexible. No matter what order the stuff is in, we'll keep going// though we'll issue errors.//// The parameter tells us which type of decl we should expect, Text or XML.// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'// [77] TextDecl::= '<?xml' VersionInfo? EncodingDecl S? '?>'void XMLScanner::scanXMLDecl(const DeclTypes type){ // Get us some buffers to use XMLBufBid bbVersion(&fBufMgr); XMLBufBid bbEncoding(&fBufMgr); XMLBufBid bbStand(&fBufMgr); XMLBufBid bbDummy(&fBufMgr); XMLBufBid bbName(&fBufMgr); // We use this little enum and array to keep up with what we found // and what order we found them in. This lets us get them free form // without too much overhead, but still know that they were in the // wrong order. enum Strings { VersionString , EncodingString , StandaloneString , UnknownString , StringCount }; int flags[StringCount] = { -1, -1, -1, -1 }; // Also set up a list of buffers in the right order so that we know // where to put stuff. XMLBuffer* buffers[StringCount] ; buffers[0] = &bbVersion.getBuffer(); buffers[1] = &bbEncoding.getBuffer(); buffers[2] = &bbStand.getBuffer(); buffers[3] = &bbDummy.getBuffer(); int curCount = 0; Strings curString; XMLBuffer& nameBuf = bbName.getBuffer(); while (true) { // Skip any spaces const unsigned int spaceCount = fReaderMgr.skipPastSpaces(true); // If we are looking at a question mark, then break out if (fReaderMgr.lookingAtChar(chQuestion)) break; // If this is not the first string, then we require the spaces if (!spaceCount && curCount) emitError(XMLErrs::ExpectedWhitespace); // Get characters up to the next whitespace or equal's sign. if (!scanUpToWSOr(nameBuf, chEqual)) emitError(XMLErrs::ExpectedDeclString); // See if it matches any of our expected strings if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgVersionString)) curString = VersionString; else if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgEncodingString)) curString = EncodingString; else if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgStandaloneString)) curString = StandaloneString; else curString = UnknownString; // If its an unknown string, then give that error. Else check to // see if this one has been done already and give that error. if (curString == UnknownString) emitError(XMLErrs::ExpectedDeclString, nameBuf.getRawBuffer()); else if (flags[curString] != -1) emitError(XMLErrs::DeclStringRep, nameBuf.getRawBuffer()); else if (flags[curString] == -1) flags[curString] = ++curCount; // Scan for an equal's sign. If we don't find it, issue an error // but keep trying to go on. if (!scanEq(true)) emitError(XMLErrs::ExpectedEqSign); // Get a quote string into the buffer for the string that we are // currently working on. if (!getQuotedString(*buffers[curString])) { emitError(XMLErrs::ExpectedQuotedString); fReaderMgr.skipPastChar(chCloseAngle); return; } // And validate the value according which one it was const XMLCh* rawValue = buffers[curString]->getRawBuffer(); if (curString == VersionString) { if (XMLString::equals(rawValue, XMLUni::fgVersion1_1)) { if (type == Decl_XML) { fXMLVersion = XMLReader::XMLV1_1; fReaderMgr.setXMLVersion(XMLReader::XMLV1_1); } else { if (fXMLVersion != XMLReader::XMLV1_1) emitError(XMLErrs::UnsupportedXMLVersion, rawValue); } } else if (XMLString::equals(rawValue, XMLUni::fgVersion1_0)) { if (type == Decl_XML) { fXMLVersion = XMLReader::XMLV1_0; fReaderMgr.setXMLVersion(XMLReader::XMLV1_0); } } else emitError(XMLErrs::UnsupportedXMLVersion, rawValue); } else if (curString == EncodingString) { if (!XMLString::isValidEncName(rawValue)) emitError(XMLErrs::BadXMLEncoding, rawValue); } else if (curString == StandaloneString) { if (XMLString::equals(rawValue, XMLUni::fgYesString)) fStandalone = true; else if (XMLString::equals(rawValue, XMLUni::fgNoString)) fStandalone = false; else { emitError(XMLErrs::BadStandalone); if (!XMLString::compareIString(rawValue, XMLUni::fgYesString)) fStandalone = true; else if (!XMLString::compareIString(rawValue, XMLUni::fgNoString)) fStandalone = false; } } } // Make sure that the strings present are in order. We don't care about // which ones are present at this point, just that any there are in the // right order. int curTop = 0; for (int index = VersionString; index < StandaloneString; index++) { if (flags[index] != -1) { if (flags[index] != curTop + 1) { emitError(XMLErrs::DeclStringsInWrongOrder); break; } curTop = flags[index]; } } // If its an XML decl, the version must be present. // If its a Text decl, then encoding must be present AND standalone must not be present. if ((type == Decl_XML) && (flags[VersionString] == -1)) emitError(XMLErrs::XMLVersionRequired); else if (type == Decl_Text) { if (flags[StandaloneString] != -1) emitError(XMLErrs::StandaloneNotLegal); if (flags[EncodingString] == -1) emitError(XMLErrs::EncodingRequired); } if (!fReaderMgr.skippedChar(chQuestion)) { emitError(XMLErrs::UnterminatedXMLDecl); fReaderMgr.skipPastChar(chCloseAngle); } else if (!fReaderMgr.skippedChar(chCloseAngle)) { emitError(XMLErrs::UnterminatedXMLDecl); fReaderMgr.skipPastChar(chCloseAngle); } // Do this before we possibly update the reader with the // actual encoding string. Otherwise, we will pass the wrong thing // for the last parameter! const XMLCh* actualEnc = fReaderMgr.getCurrentEncodingStr(); // Ok, we've now seen the real encoding string, if there was one, so // lets call back on the current reader and tell it what the real // encoding string was. If it fails, that's because it represents some // sort of contradiction with the autosensed format, and it keeps the // original encoding. // // NOTE: This can fail for a number of reasons, such as a bogus encoding // name or because its in flagrant contradiction of the auto-sensed // format. if (flags[EncodingString] != -1) { if (!fReaderMgr.getCurrentReader()->setEncoding(bbEncoding.getRawBuffer())) emitError(XMLErrs::ContradictoryEncoding, bbEncoding.getRawBuffer()); else actualEnc = bbEncoding.getRawBuffer(); } // If we have a document handler then call the XML Decl callback. if (type == Decl_XML) { if (fDocHandler) fDocHandler->XMLDecl ( bbVersion.getRawBuffer() , bbEncoding.getRawBuffer() , bbStand.getRawBuffer() , actualEnc ); } else if (type == Decl_Text) { if (fDocTypeHandler) fDocTypeHandler->TextDecl
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?