xmlreader.cpp
来自「IBM的解析xml的工具Xerces的源代码」· C++ 代码 · 共 1,501 行 · 第 1/4 页
CPP
1,501 行
/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: XMLReader.cpp,v 1.25 2004/09/29 00:24:01 knoaman Exp $ */// ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <xercesc/internal/XMLReader.hpp>#include <xercesc/util/BitOps.hpp>#include <xercesc/util/BinInputStream.hpp>#include <xercesc/util/PlatformUtils.hpp>#include <xercesc/util/RuntimeException.hpp>#include <xercesc/util/TransService.hpp>#include <xercesc/util/XMLEBCDICTranscoder.hpp>#include <xercesc/util/XMLString.hpp>#include <xercesc/util/Janitor.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------// XMLReader: Query Methods// ---------------------------------------------------------------------------// Checks whether all of the chars in the passed buffer are whitespace or// not. Breaks out on the first non-whitespace.//bool XMLReader::isAllSpaces(const XMLCh* const toCheck , const unsigned int count){ const XMLCh* curCh = toCheck; const XMLCh* endPtr = toCheck + count; while (curCh < endPtr) { if (!(fgCharCharsTable[*curCh++] & gWhitespaceCharMask)) return false; } return true;}//// Checks whether at least one of the chars in the passed buffer are whitespace or// not.//bool XMLReader::containsWhiteSpace(const XMLCh* const toCheck , const unsigned int count){ const XMLCh* curCh = toCheck; const XMLCh* endPtr = toCheck + count; while (curCh < endPtr) { if (fgCharCharsTable[*curCh++] & gWhitespaceCharMask) return true; } return false;}//// This one is not called terribly often, so call the XMLChar utility//bool XMLReader::isPublicIdChar(const XMLCh toCheck){ if (fXMLVersion == XMLV1_1) return XMLChar1_1::isPublicIdChar(toCheck); else return XMLChar1_0::isPublicIdChar(toCheck);}// ---------------------------------------------------------------------------// XMLReader: Constructors and Destructor// ---------------------------------------------------------------------------XMLReader::XMLReader(const XMLCh* const pubId , const XMLCh* const sysId , BinInputStream* const streamToAdopt , const RefFrom from , const Types type , const Sources source , const bool throwAtEnd , const bool calculateSrcOfs , const XMLVersion version , MemoryManager* const manager) : fCharIndex(0) , fCharsAvail(0) , fCurCol(1) , fCurLine(1) , fEncodingStr(0) , fForcedEncoding(false) , fNoMore(false) , fPublicId(XMLString::replicate(pubId, manager)) , fRawBufIndex(0) , fRawBytesAvail(0) , fReaderNum(0xFFFFFFFF) , fRefFrom(from) , fSentTrailingSpace(false) , fSource(source) , fSrcOfsBase(0) , fSrcOfsSupported(false) , fCalculateSrcOfs(calculateSrcOfs) , fSystemId(XMLString::replicate(sysId, manager)) , fStream(streamToAdopt) , fSwapped(false) , fThrowAtEnd(throwAtEnd) , fTranscoder(0) , fType(type) , fMemoryManager(manager){ setXMLVersion(version); // Do an initial load of raw bytes refreshRawBuffer(); // Ask the transcoding service if it supports src offset info fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs(); // // Use the recognizer class to get a basic sense of what family of // encodings this file is in. We'll start off with a reader of that // type, and update it later if needed when we read the XMLDecl line. // fEncoding = XMLRecognizer::basicEncodingProbe(fRawByteBuf, fRawBytesAvail); #if defined(XERCES_DEBUG) if ((fEncoding < XMLRecognizer::Encodings_Min) || (fEncoding > XMLRecognizer::Encodings_Max)) { ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Reader_BadAutoEncoding, fMemoryManager); } #endif fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding, fMemoryManager), fMemoryManager); // Check whether the fSwapped flag should be set or not checkForSwapped(); // // This will check to see if the first line is an XMLDecl and, if // so, decode that first line manually one character at a time. This // leaves enough characters in the buffer that the high level code // can get through the Decl and call us back with the real encoding. // doInitDecode(); // // NOTE: We won't create a transcoder until we either get a call to // setEncoding() or we get a call to refreshCharBuffer() and no // transcoder has been set yet. //}XMLReader::XMLReader(const XMLCh* const pubId , const XMLCh* const sysId , BinInputStream* const streamToAdopt , const XMLCh* const encodingStr , const RefFrom from , const Types type , const Sources source , const bool throwAtEnd , const bool calculateSrcOfs , const XMLVersion version , MemoryManager* const manager) : fCharIndex(0) , fCharsAvail(0) , fCurCol(1) , fCurLine(1) , fEncoding(XMLRecognizer::UTF_8) , fEncodingStr(0) , fForcedEncoding(true) , fNoMore(false) , fPublicId(XMLString::replicate(pubId, manager)) , fRawBufIndex(0) , fRawBytesAvail(0) , fReaderNum(0xFFFFFFFF) , fRefFrom(from) , fSentTrailingSpace(false) , fSource(source) , fSrcOfsBase(0) , fSrcOfsSupported(false) , fCalculateSrcOfs(calculateSrcOfs) , fSystemId(XMLString::replicate(sysId, manager)) , fStream(streamToAdopt) , fSwapped(false) , fThrowAtEnd(throwAtEnd) , fTranscoder(0) , fType(type) , fMemoryManager(manager){ setXMLVersion(version); // Do an initial load of raw bytes refreshRawBuffer(); // Copy the encoding string to our member fEncodingStr = XMLString::replicate(encodingStr, fMemoryManager); XMLString::upperCase(fEncodingStr); // Ask the transcoding service if it supports src offset info fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs(); // // Map the passed encoding name to one of our enums. If it does not // match one of the intrinsic encodings, it will come back 'other', // which tells us to create a transcoder based reader. // fEncoding = XMLRecognizer::encodingForName(fEncodingStr); // Check whether the fSwapped flag should be set or not checkForSwapped(); // // Create a transcoder for the encoding. Since the encoding has been // forced, this will be the one we will use, period. // XMLTransService::Codes failReason; if (fEncoding == XMLRecognizer::OtherEncoding) { // // fEncodingStr not pre-recognized, use it // directly for transcoder // fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor ( fEncodingStr , failReason , kCharBufSize , fMemoryManager ); } else { // // Use the recognized fEncoding to create the transcoder // fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor ( fEncoding , failReason , kCharBufSize , fMemoryManager ); } if (!fTranscoder) { ThrowXMLwithMemMgr1 ( TranscodingException , XMLExcepts::Trans_CantCreateCvtrFor , fEncodingStr , fMemoryManager ); } // // Note that, unlike above, we do not do an initial decode of the // first line. We take the caller's word that the encoding is correct // and just assume that the first bulk decode (kicked off by the first // get of a character) will work. // // So we do here the slipping in of the leading space if required. // if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral)) { // This represents no data from the source fCharSizeBuf[fCharsAvail] = 0; fCharOfsBuf[fCharsAvail] = 0; fCharBuf[fCharsAvail++] = chSpace; }}XMLReader::XMLReader(const XMLCh* const pubId , const XMLCh* const sysId , BinInputStream* const streamToAdopt , XMLRecognizer::Encodings encodingEnum , const RefFrom from , const Types type , const Sources source , const bool throwAtEnd , const bool calculateSrcOfs , const XMLVersion version , MemoryManager* const manager) : fCharIndex(0) , fCharsAvail(0) , fCurCol(1) , fCurLine(1) , fEncoding(XMLRecognizer::UTF_8) , fEncodingStr(0) , fForcedEncoding(true) , fNoMore(false) , fPublicId(XMLString::replicate(pubId, manager)) , fRawBufIndex(0) , fRawBytesAvail(0) , fReaderNum(0xFFFFFFFF) , fRefFrom(from) , fSentTrailingSpace(false) , fSource(source) , fSrcOfsBase(0) , fSrcOfsSupported(false) , fCalculateSrcOfs(calculateSrcOfs) , fSystemId(XMLString::replicate(sysId, manager)) , fStream(streamToAdopt) , fSwapped(false) , fThrowAtEnd(throwAtEnd) , fTranscoder(0) , fType(type) , fMemoryManager(manager){ setXMLVersion(version); // Do an initial load of raw bytes refreshRawBuffer(); // Ask the transcoding service if it supports src offset info fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs(); // // Use the passed encoding code // fEncoding = encodingEnum; fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding, fMemoryManager), fMemoryManager); // Check whether the fSwapped flag should be set or not checkForSwapped(); // // Create a transcoder for the encoding. Since the encoding has been // forced, this will be the one we will use, period. // XMLTransService::Codes failReason; fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor ( fEncoding , failReason , kCharBufSize , fMemoryManager ); if (!fTranscoder) { ThrowXMLwithMemMgr1 ( TranscodingException , XMLExcepts::Trans_CantCreateCvtrFor , fEncodingStr , fMemoryManager ); } // // Note that, unlike above, we do not do an initial decode of the // first line. We take the caller's word that the encoding is correct // and just assume that the first bulk decode (kicked off by the first // get of a character) will work. // // So we do here the slipping in of the leading space if required. // if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral)) { // This represents no data from the source
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?