📄 xmlreader.hpp
字号:
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Id: XMLReader.hpp 568078 2007-08-21 11:43:25Z amassari $ */#if !defined(XMLREADER_HPP)#define XMLREADER_HPP#include <xercesc/util/XMLChar.hpp>#include <xercesc/framework/XMLRecognizer.hpp>#include <xercesc/framework/XMLBuffer.hpp>#include <xercesc/util/TranscodingException.hpp>XERCES_CPP_NAMESPACE_BEGINclass InputSource;class BinInputStream;class ReaderMgr;class XMLScanner;class XMLTranscoder;// ---------------------------------------------------------------------------// Instances of this class are used to manage the content of entities. The// scanner maintains a stack of these, one for each entity (this means entity// in the sense of any parsed file or internal entity) currently being// scanned. This class, given a binary input stream will handle reading in// the data and decoding it from its external decoding into the internal// Unicode format. Once internallized, this class provides the access// methods to read in the data in various ways, maintains line and column// information, and provides high performance character attribute checking// methods.//// This is NOT to be derived from.//// ---------------------------------------------------------------------------class XMLPARSER_EXPORT XMLReader : public XMemory{public: // ----------------------------------------------------------------------- // Public types // ----------------------------------------------------------------------- enum Types { Type_PE , Type_General }; enum Sources { Source_Internal , Source_External }; enum RefFrom { RefFrom_Literal , RefFrom_NonLiteral }; enum XMLVersion { XMLV1_0 , XMLV1_1 , XMLV_Unknown }; // ----------------------------------------------------------------------- // Public, query methods // ----------------------------------------------------------------------- bool isAllSpaces ( const XMLCh* const toCheck , const unsigned int count ) const; bool containsWhiteSpace ( const XMLCh* const toCheck , const unsigned int count ) const; bool isXMLLetter(const XMLCh toCheck) const; bool isFirstNameChar(const XMLCh toCheck) const; bool isNameChar(const XMLCh toCheck) const; bool isPlainContentChar(const XMLCh toCheck) const; bool isSpecialStartTagChar(const XMLCh toCheck) const; bool isXMLChar(const XMLCh toCheck) const; bool isWhitespace(const XMLCh toCheck) const; bool isControlChar(const XMLCh toCheck) const; bool isPublicIdChar(const XMLCh toCheck) const; bool isFirstNCNameChar(const XMLCh toCheck) const; bool isNCNameChar(const XMLCh toCheck) const; // ----------------------------------------------------------------------- // Constructors and Destructor // ----------------------------------------------------------------------- XMLReader ( const XMLCh* const pubId , const XMLCh* const sysId , BinInputStream* const streamToAdopt , const RefFrom from , const Types type , const Sources source , const bool throwAtEnd = false , const bool calculateSrcOfs = true , const XMLVersion xmlVersion = XMLV1_0 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager ); XMLReader ( const XMLCh* const pubId , const XMLCh* const sysId , BinInputStream* const streamToAdopt , const XMLCh* const encodingStr , const RefFrom from , const Types type , const Sources source , const bool throwAtEnd = false , const bool calculateSrcOfs = true , const XMLVersion xmlVersion = XMLV1_0 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager ); XMLReader ( const XMLCh* const pubId , const XMLCh* const sysId , BinInputStream* const streamToAdopt , XMLRecognizer::Encodings encodingEnum , const RefFrom from , const Types type , const Sources source , const bool throwAtEnd = false , const bool calculateSrcOfs = true , const XMLVersion xmlVersion = XMLV1_0 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager ); ~XMLReader(); // ----------------------------------------------------------------------- // Character buffer management methods // ----------------------------------------------------------------------- unsigned long charsLeftInBuffer() const; bool refreshCharBuffer(); // ----------------------------------------------------------------------- // Scanning methods // ----------------------------------------------------------------------- bool getName(XMLBuffer& toFill, const bool token); bool getQName(XMLBuffer& toFill, int* colonPosition); bool getNextChar(XMLCh& chGotten); bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten); void movePlainContentChars(XMLBuffer &dest); bool getSpaces(XMLBuffer& toFill); bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck); bool peekNextChar(XMLCh& chGotten); bool skipIfQuote(XMLCh& chGotten); bool skipSpaces(bool& skippedSomething, bool inDecl = false); bool skippedChar(const XMLCh toSkip); bool skippedSpace(); bool skippedString(const XMLCh* const toSkip); bool peekString(const XMLCh* const toPeek); // ----------------------------------------------------------------------- // Getter methods // ----------------------------------------------------------------------- XMLSSize_t getColumnNumber() const; const XMLCh* getEncodingStr() const; XMLSSize_t getLineNumber() const; bool getNoMoreFlag() const; const XMLCh* getPublicId() const; unsigned int getReaderNum() const; RefFrom getRefFrom() const; Sources getSource() const; unsigned int getSrcOffset() const; const XMLCh* getSystemId() const; bool getThrowAtEnd() const; Types getType() const; // ----------------------------------------------------------------------- // Setter methods // ----------------------------------------------------------------------- bool setEncoding ( const XMLCh* const newEncoding ); void setReaderNum(const unsigned int newNum); void setThrowAtEnd(const bool newValue); void setXMLVersion(const XMLVersion version);private: // ----------------------------------------------------------------------- // Unimplemented constructors and operators // ----------------------------------------------------------------------- XMLReader(const XMLReader&); XMLReader& operator=(const XMLReader&); // --------------------------------------------------------------------------- // Class Constants // // kCharBufSize // The size of the character spool buffer that we use. Its not terribly // large because its just getting filled with data from a raw byte // buffer as we go along. We don't want to decode all the text at // once before we find out that there is an error. // // NOTE: This is a size in characters, not bytes. // // kRawBufSize // The size of the raw buffer from which raw bytes are spooled out // as we transcode chunks of data. As it is emptied, it is filled back // in again from the source stream. // --------------------------------------------------------------------------- enum Constants { kCharBufSize = 16 * 1024 , kRawBufSize = 48 * 1024 }; // ----------------------------------------------------------------------- // Private helper methods // ----------------------------------------------------------------------- void checkForSwapped(); void doInitCharSizeChecks(); void doInitDecode(); XMLByte getNextRawByte ( const bool eoiOk ); void refreshRawBuffer(); void setTranscoder ( const XMLCh* const newEncoding ); unsigned int xcodeMoreChars ( XMLCh* const bufToFill , unsigned char* const charSizes , const unsigned int maxChars ); void handleEOL ( XMLCh& curCh , bool inDecl = false ); // ----------------------------------------------------------------------- // Data members // // fCharIndex // The index into the character buffer. When this hits fCharsAvail // then its time to refill. // // fCharBuf // A buffer that the reader manager fills up with transcoded // characters a small amount at a time. // // fCharsAvail // The characters currently available in the character buffer. // // fCharSizeBuf // This buffer is an array that contains the number of source chars // eaten to create each char in the fCharBuf buffer. So the entry // fCharSizeBuf[x] is the number of source chars that were eaten // to make the internalized char fCharBuf[x]. This only contains // useful data if fSrcOfsSupported is true. // // fCharOfsBuf // This buffer is an array that contains the offset in the // fRawByteBuf buffer of each char in the fCharBuf buffer. It // only contains useful data if fSrcOfsSupported is true. // // fCurCol // fCurLine // The current line and column that we are in within this reader's // text. // // fEncoding // This is the rough encoding setting. This enum is set during // construction and just tells us the rough family of encoding that // we are doing. // // fEncodingStr // This is the name of the encoding we are using. It will be // provisionally set during construction, from the auto-sensed // encoding. But it might be overridden when the XMLDecl is finally // seen by the scanner. It can also be forced to a particular // encoding, in which case fForcedEncoding is set. // // fForcedEncoding // If the encoding if forced then this is set and all other // information will be ignored. This encoding will be taken as // gospel. This is done by calling an alternate constructor. // // fNoMore // This is set when the source text is exhausted. It lets us know // quickly that no more text is available. // // fRawBufIndex // The current index into the raw byte buffer. When its equal to // fRawBytesAvail then we need to read another buffer. // // fRawByteBuf // This is the raw byte buffer that is used to spool out bytes // from into the fCharBuf buffer, as we transcode in blocks. // // fRawBytesAvail // The number of bytes currently available in the raw buffer. This // helps deal with the last buffer's worth, which will usually not // be a full one. // // fReaderNum // Each reader from a particular reader manager (which means from a // particular document) is given a unique number. The reader manager // sets these numbers. They are used to catch things like partial // markup errors. // // fRefFrom // This flag is provided in the ctor, and tells us if we represent // some entity being expanded inside a literal. Sometimes things // happen differently inside and outside literals. // // fPublicId // fSystemId // These are the system and public ids of the source that this // reader is reading. // // fSentTrailingSpace // If we are a PE entity being read and we not referenced from a // literal, then a leading and trailing space must be faked into the // data. This lets us know we've done the trailing space already (so // we don't just keep doing it again and again.) // // fSource // Indicates whether the content this reader is spooling as already // been internalized. This will prevent multiple processing of // whitespace when an already internalized entity is being spooled // out. // // fSpareChar // Some encodings can create two chars in an atomic way, e.g. // surrogate pairs. We might not be able to store both, so we store // it here until the next buffer transcoding operation. // // fSrcOfsBase // This is the base offset within the source of this entity. Values // in the curent fCharSizeBuf array are relative to this value. // // fSrcOfsSupported // This flag is set to indicate whether source byte offset info // is supported. For intrinsic encodings, its always set since we // can always support it. For transcoder based encodings, we ask // the transcoder if it supports it or not. // // fStream // This is the input stream that provides the data for the reader. // Its always treated as a raw byte stream. The derived class will
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -