📄 xmlcodingsystem.cxx
字号:
// Copyright (c) 1994, 1997 James Clark// See the file COPYING for copying permission.#ifdef __GNUG__#pragma implementation#endif#include "splib.h"#ifdef SP_MULTI_BYTE#include "XMLCodingSystem.h"#include "UTF8CodingSystem.h"#include "CodingSystemKit.h"#include "Boolean.h"#include "Owner.h"#include "macros.h"#include <stddef.h>#include <string.h>#ifdef SP_DECLARE_MEMMOVEextern "C" { void *memmove(void *, const void *, size_t);}#endif#ifdef SP_NAMESPACEnamespace SP_NAMESPACE {#endifconst Char ISO646_TAB = 0x9;const Char ISO646_LF = 0xA;const Char ISO646_CR = 0xD;const Char ISO646_SPACE = 0x20;const Char ISO646_QUOT = 0x22;const Char ISO646_APOS = 0x27;const Char ISO646_LT = 0x3C;const Char ISO646_EQUAL = 0x3D;const Char ISO646_GT = 0x3E;const Char ISO646_QUEST = 0x3F;const Char ISO646_LETTER_a = 0x61;const Char ISO646_LETTER_c = 0x63;const Char ISO646_LETTER_d = 0x64;const Char ISO646_LETTER_e = 0x65;const Char ISO646_LETTER_g = 0x67;const Char ISO646_LETTER_i = 0x69;const Char ISO646_LETTER_l = 0x6C;const Char ISO646_LETTER_m = 0x6D;const Char ISO646_LETTER_n = 0x6E;const Char ISO646_LETTER_o = 0x6F;const Char ISO646_LETTER_x = 0x78;class XMLDecoder : public Decoder {public: XMLDecoder(const InputCodingSystemKit *); size_t decode(Char *to, const char *from, size_t fromLen, const char **rest); Boolean convertOffset(unsigned long &offset) const;private: class UCS2 : public Decoder { public: UCS2(Boolean swapBytes); size_t decode(Char *to, const char *from, size_t fromLen, const char **rest); Boolean convertOffset(unsigned long &offset) const; private: Boolean swapBytes_; }; // Don't keep parsing a PI longer than this. // We want to avoid reading some enormous file into memory just because // some quote was left off. enum { piMaxSize = 1024*32 }; void initDecoderDefault(); void initDecoderPI(); Boolean extractEncoding(StringC &name); static Boolean isWS(Char); enum DetectPhase { phaseInit, phasePI, phaseFinish }; DetectPhase phase_; Boolean byteOrderMark_; Boolean lsbFirst_; int guessBytesPerChar_; Owner<Decoder> subDecoder_; // Contains all the characters passed to caller that were // not produced by subDecoder_. StringC pi_; Char piLiteral_; const InputCodingSystemKit *kit_;};XMLCodingSystem::XMLCodingSystem(const InputCodingSystemKit *kit): kit_(kit){}Decoder *XMLCodingSystem::makeDecoder() const{ return new XMLDecoder(kit_);}Encoder *XMLCodingSystem::makeEncoder() const{ UTF8CodingSystem utf8; return utf8.makeEncoder();}XMLDecoder::XMLDecoder(const InputCodingSystemKit *kit): Decoder(1), kit_(kit), phase_(phaseInit), byteOrderMark_(0), lsbFirst_(0), guessBytesPerChar_(1), piLiteral_(0){}size_t XMLDecoder::decode(Char *to, const char *from, size_t fromLen, const char **rest){ if (phase_ == phaseFinish) return subDecoder_->decode(to, from, fromLen, rest); if (phase_ == phaseInit) { if (fromLen == 0) { *rest = from; return 0; } switch ((unsigned char)*from) { case 0x00: case 0x3C: case 0xFF: case 0xFE: if (fromLen < 2) { *rest = from; return 0; } switch (((unsigned char)from[0] << 8) | (unsigned char)from[1]) { case 0xFEFF: phase_ = phasePI; byteOrderMark_ = 1; guessBytesPerChar_ = 2; from += 2; fromLen -= 2; break; case 0xFFFE: lsbFirst_ = 1; phase_ = phasePI; byteOrderMark_ = 1; guessBytesPerChar_ = 2; from += 2; fromLen -= 2; break; case 0x3C3F: phase_ = phasePI; break; case 0x3C00: lsbFirst_ = 1; phase_ = phasePI; guessBytesPerChar_ = 2; break; case 0x003C: phase_ = phasePI; guessBytesPerChar_ = 2; break; default: break; } if (phase_ == phasePI) break; // fall through default: phase_ = phaseFinish; guessBytesPerChar_ = 1; initDecoderDefault(); return subDecoder_->decode(to, from, fromLen, rest); } } ASSERT(phase_ == phasePI); Char *p = to; for (; fromLen > guessBytesPerChar_; fromLen -= guessBytesPerChar_, from += guessBytesPerChar_) { if (!piLiteral_ && pi_.size() > 0 && pi_[pi_.size() - 1] == ISO646_GT) { initDecoderPI(); phase_ = phaseFinish; return (p - to) + subDecoder_->decode(p, from, fromLen, rest); } Char c = (unsigned char)from[0]; if (guessBytesPerChar_ > 1) { if (lsbFirst_) c |= (unsigned char)from[1] << 8; else { c <<= 8; c |= (unsigned char)from[1]; } } static const Char startBytes[] = { ISO646_LT, ISO646_QUEST, ISO646_LETTER_x, ISO646_LETTER_m, ISO646_LETTER_l }; // Stop accumulating the PI if we get characters that are illegal in the PI. if (c == 0 || c >= 0x7F || (pi_.size() > 0 && c == ISO646_LT) || pi_.size() > piMaxSize || (pi_.size() < 5 && c != startBytes[pi_.size()]) || (pi_.size() == 5 && !isWS(c))) { initDecoderDefault(); phase_ = phaseFinish; break; } *p++ = c; pi_ += c; if (piLiteral_) { if (c == piLiteral_) piLiteral_ = 0; } else if (c == ISO646_QUOT || c == ISO646_APOS) piLiteral_ = c; } size_t n = p - to; if (phase_ == phaseFinish && fromLen > 0) n += subDecoder_->decode(p, from, fromLen, rest); else *rest = from; return n;}Boolean XMLDecoder::convertOffset(unsigned long &n) const{ if (n <= pi_.size()) n *= guessBytesPerChar_; else { if (!subDecoder_) return 0; unsigned long tem = n - pi_.size(); if (!subDecoder_->convertOffset(tem)) return 0; n = tem + pi_.size() * guessBytesPerChar_; } if (byteOrderMark_) n += 2; return 1;}void XMLDecoder::initDecoderDefault(){ if (guessBytesPerChar_ == 1) { UTF8CodingSystem utf8; subDecoder_ = utf8.makeDecoder(); } else { unsigned short n = 0x1; minBytesPerChar_ = 2; subDecoder_ = new UCS2((*(char *)&n == 0x1) != lsbFirst_); }}void XMLDecoder::initDecoderPI(){ StringC name; if (!extractEncoding(name)) initDecoderDefault(); const char *dummy; static const UnivCharsetDesc::Range range = { 0, 128, 0 }; CharsetInfo piCharset(UnivCharsetDesc(&range, 1)); const InputCodingSystem *ics = kit_->makeInputCodingSystem(name, piCharset, 0, dummy); if (ics) { subDecoder_ = ics->makeDecoder(); minBytesPerChar_ = subDecoder_->minBytesPerChar(); } if (!subDecoder_) initDecoderDefault();}Boolean XMLDecoder::isWS(Char c){ switch (c) { case ISO646_CR: case ISO646_LF: case ISO646_SPACE: case ISO646_TAB: return 1; } return 0;}Boolean XMLDecoder::extractEncoding(StringC &name){ Char lit = 0; for (size_t i = 5; i < pi_.size(); i++) { if (!lit) { if (pi_[i] == ISO646_APOS || pi_[i] == ISO646_QUOT) lit = pi_[i]; else if (pi_[i] == ISO646_EQUAL) { size_t j = i; for (; j > 0; j--) { if (!isWS(pi_[j - 1])) break; } size_t nameEnd = j; for (; j > 0; j--) { if (isWS(pi_[j - 1]) || pi_[j - 1] == ISO646_QUOT || pi_[j - 1] == ISO646_APOS) break; } static const Char encodingName[] = { ISO646_LETTER_e, ISO646_LETTER_n, ISO646_LETTER_c, ISO646_LETTER_o, ISO646_LETTER_d, ISO646_LETTER_i, ISO646_LETTER_n, ISO646_LETTER_g, 0 }; const Char *s = encodingName; for (; *s && j < nameEnd; j++, s++) if (pi_[j] != *s) break; if (j == nameEnd && *s == 0) { size_t j = i + 1; for (; j < pi_.size(); j++) { if (!isWS(pi_[j])) break; } if (pi_[j] == ISO646_QUOT || pi_[j] == ISO646_APOS) { Char lit = pi_[j]; size_t nameStart = j + 1; for (++j; j < pi_.size(); j++) { if (pi_[j] == lit) { if (j > nameStart) { name.assign(&pi_[nameStart], j - nameStart); return 1; } break; } } } return 0; } } } else if (pi_[i] == lit) lit = 0; } return 0;}XMLDecoder::UCS2::UCS2(Boolean swapBytes): swapBytes_(swapBytes){}size_t XMLDecoder::UCS2::decode(Char *to, const char *from, size_t fromLen, const char **rest){ union U { unsigned short word; char bytes[2]; }; fromLen &= ~1; *rest = from + fromLen; if (sizeof(Char) == 2) { if (!swapBytes_) { if (from != (char *)to) memmove(to, from, fromLen); return fromLen/2; } } if (swapBytes_) { for (size_t n = fromLen; n > 0; n -= 2) { U u; u.bytes[1] = *from++; u.bytes[0] = *from++; *to++ = u.word; } } else { for (size_t n = fromLen; n > 0; n -= 2) { U u; u.bytes[0] = *from++; u.bytes[1] = *from++; *to++ = u.word; } } return fromLen/2;}Boolean XMLDecoder::UCS2::convertOffset(unsigned long &n) const{ n *= 2; return 1;}#ifdef SP_NAMESPACE}#endif#else /* not SP_MULTI_BYTE */#ifndef __GNUG__static char non_empty_translation_unit; // sigh#endif#endif /* not SP_MULTI_BYTE */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -