⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmlcodingsystem.cxx

📁 SP是一个基于GNU C++编译器
💻 CXX
字号:
// Copyright (c) 1994, 1997 James Clark// See the file COPYING for copying permission.#ifdef __GNUG__#pragma implementation#endif#include "splib.h"#ifdef SP_MULTI_BYTE#include "XMLCodingSystem.h"#include "UTF8CodingSystem.h"#include "CodingSystemKit.h"#include "Boolean.h"#include "Owner.h"#include "macros.h"#include <stddef.h>#include <string.h>#ifdef SP_DECLARE_MEMMOVEextern "C" {  void *memmove(void *, const void *, size_t);}#endif#ifdef SP_NAMESPACEnamespace SP_NAMESPACE {#endifconst Char ISO646_TAB = 0x9;const Char ISO646_LF = 0xA;const Char ISO646_CR = 0xD;const Char ISO646_SPACE = 0x20;const Char ISO646_QUOT = 0x22;const Char ISO646_APOS = 0x27;const Char ISO646_LT = 0x3C;const Char ISO646_EQUAL = 0x3D;const Char ISO646_GT = 0x3E;const Char ISO646_QUEST = 0x3F;const Char ISO646_LETTER_a = 0x61;const Char ISO646_LETTER_c = 0x63;const Char ISO646_LETTER_d = 0x64;const Char ISO646_LETTER_e = 0x65;const Char ISO646_LETTER_g = 0x67;const Char ISO646_LETTER_i = 0x69;const Char ISO646_LETTER_l = 0x6C;const Char ISO646_LETTER_m = 0x6D;const Char ISO646_LETTER_n = 0x6E;const Char ISO646_LETTER_o = 0x6F;const Char ISO646_LETTER_x = 0x78;class XMLDecoder : public Decoder {public:  XMLDecoder(const InputCodingSystemKit *);  size_t decode(Char *to, const char *from, size_t fromLen,		const char **rest);  Boolean convertOffset(unsigned long &offset) const;private:  class UCS2 : public Decoder {  public:    UCS2(Boolean swapBytes);    size_t decode(Char *to, const char *from, size_t fromLen,		  const char **rest);    Boolean convertOffset(unsigned long &offset) const;  private:    Boolean swapBytes_;  };  // Don't keep parsing a PI longer than this.  // We want to avoid reading some enormous file into memory just because  // some quote was left off.  enum { piMaxSize = 1024*32 };  void initDecoderDefault();  void initDecoderPI();  Boolean extractEncoding(StringC &name);  static Boolean isWS(Char);  enum DetectPhase {    phaseInit,    phasePI,    phaseFinish  };  DetectPhase phase_;  Boolean byteOrderMark_;  Boolean lsbFirst_;  int guessBytesPerChar_;  Owner<Decoder> subDecoder_;  // Contains all the characters passed to caller that were  // not produced by subDecoder_.  StringC pi_;  Char piLiteral_;  const InputCodingSystemKit *kit_;};XMLCodingSystem::XMLCodingSystem(const InputCodingSystemKit *kit): kit_(kit){}Decoder *XMLCodingSystem::makeDecoder() const{  return new XMLDecoder(kit_);}Encoder *XMLCodingSystem::makeEncoder() const{  UTF8CodingSystem utf8;  return utf8.makeEncoder();}XMLDecoder::XMLDecoder(const InputCodingSystemKit *kit): Decoder(1),  kit_(kit),  phase_(phaseInit),  byteOrderMark_(0),  lsbFirst_(0),  guessBytesPerChar_(1),  piLiteral_(0){}size_t XMLDecoder::decode(Char *to, const char *from, size_t fromLen,			  const char **rest){  if (phase_ == phaseFinish)    return subDecoder_->decode(to, from, fromLen, rest);  if (phase_ == phaseInit) {    if (fromLen == 0) {      *rest = from;      return 0;    }    switch ((unsigned char)*from) {    case 0x00:    case 0x3C:    case 0xFF:    case 0xFE:      if (fromLen < 2) {	*rest = from;	return 0;      }      switch (((unsigned char)from[0] << 8) | (unsigned char)from[1]) {      case 0xFEFF:	phase_ = phasePI;	byteOrderMark_ = 1;	guessBytesPerChar_ = 2;	from += 2;	fromLen -= 2;	break;      case 0xFFFE:	lsbFirst_ = 1;	phase_ = phasePI;	byteOrderMark_ = 1;	guessBytesPerChar_ = 2;	from += 2;	fromLen -= 2;	break;      case 0x3C3F:	phase_ = phasePI;	break;      case 0x3C00:	lsbFirst_ = 1;	phase_ = phasePI;	guessBytesPerChar_ = 2;	break;      case 0x003C:	phase_ = phasePI;	guessBytesPerChar_ = 2;	break;      default:	break;      }      if (phase_ == phasePI)	break;      // fall through    default:      phase_ = phaseFinish;      guessBytesPerChar_ = 1;      initDecoderDefault();      return subDecoder_->decode(to, from, fromLen, rest);    }  }  ASSERT(phase_ == phasePI);  Char *p = to;  for (; fromLen > guessBytesPerChar_;       fromLen -= guessBytesPerChar_, from += guessBytesPerChar_) {    if (!piLiteral_ && pi_.size() > 0 && pi_[pi_.size() - 1] == ISO646_GT) {      initDecoderPI();      phase_ = phaseFinish;      return (p - to) + subDecoder_->decode(p, from, fromLen, rest);    }    Char c = (unsigned char)from[0];    if (guessBytesPerChar_ > 1) {      if (lsbFirst_)	c |= (unsigned char)from[1] << 8;      else {	c <<= 8;	c |= (unsigned char)from[1];      }    }    static const Char startBytes[] = {      ISO646_LT, ISO646_QUEST, ISO646_LETTER_x, ISO646_LETTER_m, ISO646_LETTER_l    };    // Stop accumulating the PI if we get characters that are illegal in the PI.    if (c == 0        || c >= 0x7F	|| (pi_.size() > 0 && c == ISO646_LT)	|| pi_.size() > piMaxSize	|| (pi_.size() < 5 && c != startBytes[pi_.size()])	|| (pi_.size() == 5 && !isWS(c))) {      initDecoderDefault();      phase_ = phaseFinish;      break;    }    *p++ = c;    pi_ += c;    if (piLiteral_) {      if (c == piLiteral_)	piLiteral_ = 0;    }    else if (c == ISO646_QUOT || c == ISO646_APOS)      piLiteral_ = c;  }  size_t n = p - to;  if (phase_ == phaseFinish && fromLen > 0)    n += subDecoder_->decode(p, from, fromLen, rest);  else    *rest = from;  return n;}Boolean XMLDecoder::convertOffset(unsigned long &n) const{  if (n <= pi_.size())    n *= guessBytesPerChar_;  else {    if (!subDecoder_)      return 0;    unsigned long tem = n - pi_.size();    if (!subDecoder_->convertOffset(tem))      return 0;    n = tem + pi_.size() * guessBytesPerChar_;  }  if (byteOrderMark_)    n += 2;  return 1;}void XMLDecoder::initDecoderDefault(){  if (guessBytesPerChar_ == 1) {    UTF8CodingSystem utf8;    subDecoder_ = utf8.makeDecoder();  }  else {    unsigned short n = 0x1;    minBytesPerChar_ = 2;    subDecoder_ = new UCS2((*(char *)&n == 0x1) != lsbFirst_);  }}void XMLDecoder::initDecoderPI(){  StringC name;  if (!extractEncoding(name))    initDecoderDefault();  const char *dummy;  static const UnivCharsetDesc::Range range = { 0, 128, 0 };  CharsetInfo piCharset(UnivCharsetDesc(&range, 1));  const InputCodingSystem *ics    = kit_->makeInputCodingSystem(name,				  piCharset,				  0,				  dummy);  if (ics) {    subDecoder_ = ics->makeDecoder();    minBytesPerChar_ = subDecoder_->minBytesPerChar();  }  if (!subDecoder_)    initDecoderDefault();}Boolean XMLDecoder::isWS(Char c){  switch (c) {  case ISO646_CR:  case ISO646_LF:  case ISO646_SPACE:  case ISO646_TAB:    return 1;  }  return 0;}Boolean XMLDecoder::extractEncoding(StringC &name){  Char lit = 0;  for (size_t i = 5; i < pi_.size(); i++) {    if (!lit) {      if (pi_[i] == ISO646_APOS || pi_[i] == ISO646_QUOT)	lit = pi_[i];      else if (pi_[i] == ISO646_EQUAL) {	size_t j = i;	for (; j > 0; j--) {	  if (!isWS(pi_[j - 1]))	    break;	}	size_t nameEnd = j;	for (; j > 0; j--) {	  if (isWS(pi_[j - 1]) || pi_[j - 1] == ISO646_QUOT || pi_[j - 1] == ISO646_APOS)	    break;	}	static const Char encodingName[] = {	  ISO646_LETTER_e, ISO646_LETTER_n, ISO646_LETTER_c, ISO646_LETTER_o,	  ISO646_LETTER_d, ISO646_LETTER_i, ISO646_LETTER_n, ISO646_LETTER_g,	  0	};	const Char *s = encodingName;	for (; *s && j < nameEnd; j++, s++)	  if (pi_[j] != *s)	    break;	if (j == nameEnd && *s == 0) {	  size_t j = i + 1;	  for (; j < pi_.size(); j++) {	    if (!isWS(pi_[j]))	      break;	  }	  if (pi_[j] == ISO646_QUOT || pi_[j] == ISO646_APOS) {	    Char lit = pi_[j];	    size_t nameStart = j + 1;	    for (++j; j < pi_.size(); j++) {	      if (pi_[j] == lit) {		if (j > nameStart) {		  name.assign(&pi_[nameStart], j - nameStart);		  return 1;		}		break;	      }	    }	  }	  return 0;	}      }    }    else if (pi_[i] == lit)      lit = 0;  }  return 0;}XMLDecoder::UCS2::UCS2(Boolean swapBytes): swapBytes_(swapBytes){}size_t XMLDecoder::UCS2::decode(Char *to, const char *from, size_t fromLen,				const char **rest){  union U {    unsigned short word;    char bytes[2];  };  fromLen &= ~1;  *rest = from + fromLen;  if (sizeof(Char) == 2) {    if (!swapBytes_) {      if (from != (char *)to)	memmove(to, from, fromLen);      return fromLen/2;    }  }  if (swapBytes_) {    for (size_t n = fromLen; n > 0; n -= 2) {      U u;      u.bytes[1] = *from++;      u.bytes[0] = *from++;      *to++ = u.word;    }  }  else  {    for (size_t n = fromLen; n > 0; n -= 2) {      U u;      u.bytes[0] = *from++;      u.bytes[1] = *from++;      *to++ = u.word;    }  }  return fromLen/2;}Boolean XMLDecoder::UCS2::convertOffset(unsigned long &n) const{  n *= 2;  return 1;}#ifdef SP_NAMESPACE}#endif#else /* not SP_MULTI_BYTE */#ifndef __GNUG__static char non_empty_translation_unit;	// sigh#endif#endif /* not SP_MULTI_BYTE */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -