xmlscanner.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,855 行 · 第 1/5 页
CPP
1,855 行
( bbVersion.getRawBuffer() , bbEncoding.getRawBuffer() ); }}const XMLCh* XMLScanner::getURIText(const unsigned int uriId) const{ if (fURIStringPool->exists(uriId)) { // Look up the URI in the string pool and return its id const XMLCh* value = fURIStringPool->getValueForId(uriId); if (!value) return XMLUni::fgZeroLenString; return value; } else return XMLUni::fgZeroLenString;}bool XMLScanner::getURIText( const unsigned int uriId , XMLBuffer& uriBufToFill) const{ if (fURIStringPool->exists(uriId)) { // Look up the URI in the string pool and return its id const XMLCh* value = fURIStringPool->getValueForId(uriId); if (!value) return false; uriBufToFill.set(value); return true; } else return false;}bool XMLScanner::checkXMLDecl(bool startWithAngle) { // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' // [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') // // [3] S ::= (#x20 | #x9 | #xD | #xA)+ if (startWithAngle) { if (fReaderMgr.peekString(XMLUni::fgXMLDeclString)) { if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpace) || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTab) || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLF) || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCR)) { return true; } else if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpaceU) || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTabU) || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLFU) || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCRU)) { // Just in case, check for upper case. If found, issue // an error, but keep going. emitError(XMLErrs::XMLDeclMustBeLowerCase); return true; } } } else { if (fReaderMgr.peekString(XMLUni::fgXMLString)) { if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpace) || fReaderMgr.skippedString(XMLUni::fgXMLStringHTab) || fReaderMgr.skippedString(XMLUni::fgXMLStringLF) || fReaderMgr.skippedString(XMLUni::fgXMLStringCR)) { return true; } else if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpaceU) || fReaderMgr.skippedString(XMLUni::fgXMLStringHTabU) || fReaderMgr.skippedString(XMLUni::fgXMLStringLFU) || fReaderMgr.skippedString(XMLUni::fgXMLStringCRU)) { // Just in case, check for upper case. If found, issue // an error, but keep going. emitError(XMLErrs::XMLDeclMustBeLowerCase); return true; } } } return false;}// ---------------------------------------------------------------------------// XMLScanner: Grammar preparsing// ---------------------------------------------------------------------------Grammar* XMLScanner::loadGrammar(const XMLCh* const systemId , const short grammarType , const bool toCache){ InputSource* srcToUse = 0; if (fEntityHandler){ ReaderMgr::LastExtEntityInfo lastInfo; fReaderMgr.getLastExtEntityInfo(lastInfo); XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity, systemId, 0, XMLUni::fgZeroLenString, lastInfo.systemId); srcToUse = fEntityHandler->resolveEntity(&resourceIdentifier); } // First we try to parse it as a URL. If that fails, we assume its // a file and try it that way. if (!srcToUse) { try { // Create a temporary URL. Since this is the primary document, // it has to be fully qualified. If not, then assume we are just // mistaking a file for a URL. XMLURL tmpURL(fMemoryManager); if (XMLURL::parse(systemId, tmpURL)) { if (tmpURL.isRelative()) { if (!fStandardUriConformant) srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager); else { // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr // emit the error directly MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager); fInException = true; emitError ( XMLErrs::XMLException_Fatal , e.getType() , e.getMessage() ); return 0; } } else { if (fStandardUriConformant && tmpURL.hasInvalidChar()) { MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager); fInException = true; emitError ( XMLErrs::XMLException_Fatal , e.getType() , e.getMessage() ); return 0; } srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager); } } else { if (!fStandardUriConformant) srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager); else { // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr // emit the error directly // lazy bypass ... since all MalformedURLException are fatal, no need to check the type MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL); fInException = true; emitError ( XMLErrs::XMLException_Fatal , e.getType() , e.getMessage() ); return 0; } } } catch(const XMLException& excToCatch) { // For any other XMLException, // emit the error and catch any user exception thrown from here. fInException = true; if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) emitError ( XMLErrs::XMLException_Warning , excToCatch.getType() , excToCatch.getMessage() ); else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) emitError ( XMLErrs::XMLException_Fatal , excToCatch.getType() , excToCatch.getMessage() ); else emitError ( XMLErrs::XMLException_Error , excToCatch.getType() , excToCatch.getMessage() ); return 0; } } Janitor<InputSource> janSrc(srcToUse); return loadGrammar(*srcToUse, grammarType, toCache);}Grammar* XMLScanner::loadGrammar(const char* const systemId , const short grammarType , const bool toCache){ // We just delegate this to the XMLCh version after transcoding XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager); ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager); return loadGrammar(tmpBuf, grammarType, toCache);}// ---------------------------------------------------------------------------// XMLScanner: Setter methods// ---------------------------------------------------------------------------void XMLScanner::setURIStringPool(XMLStringPool* const stringPool){ fURIStringPool = stringPool; fEmptyNamespaceId = fURIStringPool->addOrFind(XMLUni::fgZeroLenString); fUnknownNamespaceId = fURIStringPool->addOrFind(XMLUni::fgUnknownURIName); fXMLNamespaceId = fURIStringPool->addOrFind(XMLUni::fgXMLURIName); fXMLNSNamespaceId = fURIStringPool->addOrFind(XMLUni::fgXMLNSURIName);}// ---------------------------------------------------------------------------// XMLScanner: Private helper methods// ---------------------------------------------------------------------------/*** * In reusing grammars (cacheing grammar from parse, or use cached grammar), internal * dtd is allowed conditionally. * * In the case of cacheing grammar from parse, it is NOT allowed. * * In the case of use cached grammar, * if external dtd is present and it is parsed before, then it is not allowed, * otherwise it is allowed. * ***/void XMLScanner::checkInternalDTD(bool hasExtSubset, const XMLCh* const sysId){ if (fToCacheGrammar) ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager); if (fUseCachedGrammar && hasExtSubset ) { InputSource* sysIdSrc = resolveSystemId(sysId); Janitor<InputSource> janSysIdSrc(sysIdSrc); Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId()); if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) { ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager); } }}// This method is called after the content scan to insure that all the// ID/IDREF attributes match up (i.e. that all IDREFs refer to IDs.) This is// an XML 1.0 rule, so we can do here in the core.void XMLScanner::checkIDRefs(){ // Iterate the id ref list. If we find any entries here which are used // but not declared, then that's an error. RefHashTableOfEnumerator<XMLRefInfo> refEnum(fValidationContext->getIdRefList(), false, fMemoryManager); while (refEnum.hasMoreElements()) { // Get a ref to the current element const XMLRefInfo& curRef = refEnum.nextElement(); // If its used but not declared, then its an error if (!curRef.getDeclared() && curRef.getUsed() && fValidate) fValidator->emitError(XMLValid::IDNotDeclared, curRef.getRefName()); }}// This just does a simple check that the passed progressive scan token is// legal for this scanner.bool XMLScanner::isLegalToken(const XMLPScanToken& toCheck){ return ((fScannerId == toCheck.fScannerId) && (fSequenceId == toCheck.fSequenceId));}// This method will handle figuring out what the next top level token is// in the input stream. It will return an enumerated value that indicates// what it believes the next XML level token must be. It will eat as many// chars are required to figure out what is next.XMLScanner::XMLTokens XMLScanner::senseNextToken(unsigned int& orgReader){ // Get the next character and use it to guesstimate what the next token // is going to be. We turn on end of entity exceptions when we do this // in order to catch the scenario where the current entity ended at // the > of some markup. XMLCh nextCh; { ThrowEOEJanitor janMgr(&fReaderMgr, true); nextCh = fReaderMgr.peekNextChar(); } // Check for special chars. Start with the most // obvious end of file, which should be legal here at top level. if (!nextCh) return Token_EOF; // If it's not a '<' we must be in content. // // This includes entity references '&' of some sort. These must // be character data because that's the only place a reference can // occur in content. if (nextCh != chOpenAngle) return Token_CharData; // Ok it had to have been a '<' character. So get it out of the reader // and store the reader number where we saw it, passing it back to the // caller. fReaderMgr.getNextChar(); orgReader = fReaderMgr.getCurrentReaderNum(); // Ok, so lets go through the things that it could be at this point which // are all some form of markup. nextCh = fReaderMgr.peekNextChar(); if (nextCh == chForwardSlash) { fReaderMgr.getNextChar(); return Token_EndTag; } else if (nextCh == chBang) { static const XMLCh gCDATAStr[] = { chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A , chLatin_T, chLatin_A, chNull }; static const XMLCh gCommentString[] = { chBang, chDash, chDash, chNull }; if (fReaderMgr.skippedString(gCDATAStr)) return Token_CData; if (fReaderMgr.skippedString(gCommentString)) return Token_Comment; emitError(XMLErrs::ExpectedCommentOrCDATA); return Token_Unknown; } else if (nextCh == chQuestion) { // It must be a PI fReaderMgr.getNextChar(); return Token_PI; } // Assume its an element name, so return with a start tag token. If it // turns out not to be, then it will fail when it can
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?