📄 xmlreader.hpp
字号:
bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten); void movePlainContentChars(XMLBuffer &dest); bool getSpaces(XMLBuffer& toFill); bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck); bool peekNextChar(XMLCh& chGotten); bool skipIfQuote(XMLCh& chGotten); bool skipSpaces(bool& skippedSomething); bool skippedChar(const XMLCh toSkip); bool skippedSpace(); bool skippedString(const XMLCh* const toSkip); bool peekString(const XMLCh* const toPeek); // ----------------------------------------------------------------------- // Getter methods // ----------------------------------------------------------------------- XMLSSize_t getColumnNumber() const; const XMLCh* getEncodingStr() const; XMLSSize_t getLineNumber() const; bool getNoMoreFlag() const; const XMLCh* getPublicId() const; unsigned int getReaderNum() const; RefFrom getRefFrom() const; Sources getSource() const; unsigned int getSrcOffset() const; const XMLCh* getSystemId() const; bool getThrowAtEnd() const; Types getType() const; // ----------------------------------------------------------------------- // Setter methods // ----------------------------------------------------------------------- bool setEncoding ( const XMLCh* const newEncoding ); void setReaderNum(const unsigned int newNum); void setThrowAtEnd(const bool newValue); void setXMLVersion(const XMLVersion version);private: // ----------------------------------------------------------------------- // Unimplemented constructors and operators // ----------------------------------------------------------------------- XMLReader(const XMLReader&); XMLReader& operator=(const XMLReader&); // --------------------------------------------------------------------------- // Class Constants // // kCharBufSize // The size of the character spool buffer that we use. Its not terribly // large because its just getting filled with data from a raw byte // buffer as we go along. We don't want to decode all the text at // once before we find out that there is an error. // // NOTE: This is a size in characters, not bytes. // // kRawBufSize // The size of the raw buffer from which raw bytes are spooled out // as we transcode chunks of data. As it is emptied, it is filled back // in again from the source stream. // --------------------------------------------------------------------------- enum Constants { kCharBufSize = 16 * 1024 , kRawBufSize = 48 * 1024 }; // ----------------------------------------------------------------------- // Private helper methods // ----------------------------------------------------------------------- void checkForSwapped(); void doInitCharSizeChecks(); void doInitDecode(); XMLByte getNextRawByte ( const bool eoiOk ); void refreshRawBuffer(); void setTranscoder ( const XMLCh* const newEncoding ); unsigned int xcodeMoreChars ( XMLCh* const bufToFill , unsigned char* const charSizes , const unsigned int maxChars ); // ----------------------------------------------------------------------- // Data members // // fCharIndex // The index into the character buffer. When this hits fCharsAvail // then its time to refill. // // fCharBuf // A buffer that the reader manager fills up with transcoded // characters a small amount at a time. // // fCharsAvail // The characters currently available in the character buffer. // // fCharSizeBuf // This buffer is an array that contains the number of source chars // eaten to create each char in the fCharBuf buffer. So the entry // fCharSizeBuf[x] is the number of source chars that were eaten // to make the internalized char fCharBuf[x]. This only contains // useful data if fSrcOfsSupported is true. // // fCurCol // fCurLine // The current line and column that we are in within this reader's // text. // // fEncoding // This is the rough encoding setting. This enum is set during // construction and just tells us the rough family of encoding that // we are doing. // // fEncodingStr // This is the name of the encoding we are using. It will be // provisionally set during construction, from the auto-sensed // encoding. But it might be overridden when the XMLDecl is finally // seen by the scanner. It can also be forced to a particular // encoding, in which case fForcedEncoding is set. // // fForcedEncoding // If the encoding if forced then this is set and all other // information will be ignored. This encoding will be taken as // gospel. This is done by calling an alternate constructor. // // fNoMore // This is set when the source text is exhausted. It lets us know // quickly that no more text is available. // // fRawBufIndex // The current index into the raw byte buffer. When its equal to // fRawBytesAvail then we need to read another buffer. // // fRawByteBuf // This is the raw byte buffer that is used to spool out bytes // from into the fCharBuf buffer, as we transcode in blocks. // // fRawBytesAvail // The number of bytes currently available in the raw buffer. This // helps deal with the last buffer's worth, which will usually not // be a full one. // // fReaderNum // Each reader from a particular reader manager (which means from a // particular document) is given a unique number. The reader manager // sets these numbers. They are used to catch things like partial // markup errors. // // fRefFrom // This flag is provided in the ctor, and tells us if we represent // some entity being expanded inside a literal. Sometimes things // happen differently inside and outside literals. // // fPublicId // fSystemId // These are the system and public ids of the source that this // reader is reading. // // fSentTrailingSpace // If we are a PE entity being read and we not referenced from a // literal, then a leading and trailing space must be faked into the // data. This lets us know we've done the trailing space already (so // we don't just keep doing it again and again.) // // fSource // Indicates whether the content this reader is spooling as already // been internalized. This will prevent multiple processing of // whitespace when an already internalized entity is being spooled // out. // // fSpareChar // Some encodings can create two chars in an atomic way, e.g. // surrogate pairs. We might not be able to store both, so we store // it here until the next buffer transcoding operation. // // fSrcOfsBase // This is the base offset within the source of this entity. Values // in the curent fCharSizeBuf array are relative to this value. // // fSrcOfsSupported // This flag is set to indicate whether source byte offset info // is supported. For intrinsic encodings, its always set since we // can always support it. For transcoder based encodings, we ask // the transcoder if it supports it or not. // // fStream // This is the input stream that provides the data for the reader. // Its always treated as a raw byte stream. The derived class will // ask for buffers of text from it and will handle making some // sense of it. // // fSwapped // If the encoding is one of the ones we do intrinsically, and its // in a different byte order from our native order, then this is // set to remind us to byte swap it during transcoding. // // fThrowAtEnd // Indicates whether the reader manager should throw an end of entity // exception at the end of this reader instance. This is usually // set for top level external entity references. It overrides the // reader manager's global flag that controls throwing at the end // of entities. Defaults to false. // // fTranscoder // If the encoding is not one that we handle intrinsically, then // we use an an external transcoder to do it. This class is an // abstraction that allows us to use pluggable external transcoding // services (via XMLTransService in util.) // // fType // Indicates whether this reader represents a PE or not. If this // flag is true and the fInLiteral flag is false, then we will put // out an extra space at the end. // // fgCharCharsTable; // Pointer to XMLChar table, depends on XML version // // fNEL // Boolean indicates if NEL and LSEP should be recognized as NEL // // fXMLVersion // Enum to indicate if this Reader is conforming to XML 1.0 or XML 1.1 // ----------------------------------------------------------------------- unsigned int fCharIndex; XMLCh fCharBuf[kCharBufSize]; unsigned int fCharsAvail; unsigned char fCharSizeBuf[kCharBufSize]; XMLSSize_t fCurCol; XMLSSize_t fCurLine; XMLRecognizer::Encodings fEncoding; XMLCh* fEncodingStr; bool fForcedEncoding; bool fNoMore; XMLCh* fPublicId; unsigned int fRawBufIndex; XMLByte fRawByteBuf[kRawBufSize]; unsigned int fRawBytesAvail; unsigned int fReaderNum; RefFrom fRefFrom; bool fSentTrailingSpace; Sources fSource; unsigned int fSrcOfsBase; bool fSrcOfsSupported; bool fCalculateSrcOfs; XMLCh* fSystemId; BinInputStream* fStream; bool fSwapped; bool fThrowAtEnd; XMLTranscoder* fTranscoder; Types fType; XMLByte* fgCharCharsTable; bool fNEL; XMLVersion fXMLVersion; MemoryManager* fMemoryManager;};// ---------------------------------------------------------------------------// XMLReader: Public, query methods// ---------------------------------------------------------------------------inline bool XMLReader::isNameChar(const XMLCh toCheck){ return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0);}inline bool XMLReader::isPlainContentChar(const XMLCh toCheck){ return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);}inline bool XMLReader::isFirstNameChar(const XMLCh toCheck){ return ((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0);}inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck){ return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);}inline bool XMLReader::isXMLChar(const XMLCh toCheck){ return ((fgCharCharsTable[toCheck] & gXMLCharMask) != 0);}inline bool XMLReader::isXMLLetter(const XMLCh toCheck){ return ((fgCharCharsTable[toCheck] & gLetterCharMask) != 0);}inline bool XMLReader::isWhitespace(const XMLCh toCheck){ return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);}inline bool XMLReader::isControlChar(const XMLCh toCheck){ return ((fgCharCharsTable[toCheck] & gControlCharMask) != 0);}// ---------------------------------------------------------------------------// XMLReader: Buffer management methods// ---------------------------------------------------------------------------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -