📄 utf8codingsystem.cxx

📁 SP是一个基于GNU C++编译器
💻 CXX
字号:
// Copyright (c) 1994 James Clark// See the file COPYING for copying permission.#include "splib.h"#ifdef SP_MULTI_BYTE#include "UTF8CodingSystem.h"#include "constant.h"#ifdef SP_NAMESPACEnamespace SP_NAMESPACE {#endifenum {  // cmaskN is mask for first byte to test for N byte sequence  cmask1 = 0x80,  cmask2 = 0xe0,  cmask3 = 0xf0,  cmask4 = 0xf8,  cmask5 = 0xfc,  cmask6 = 0xfe,  // cvalN is value of masked first byte of N byte sequence  cval1 = 0x00,  cval2 = 0xc0,  cval3 = 0xe0,  cval4 = 0xf0,  cval5 = 0xf8,  cval6 = 0xfc,  // vmaskN is mask to get value from first byte in N byte sequence  vmask2 = 0x1f,  vmask3 = 0xf,  vmask4 = 0x7,  vmask5 = 0x3,  vmask6 = 0x1,  // minN is minimum legal resulting value for N byte sequence  min2 = 0x80,  min3 = 0x800,  min4 = 0x10000,  min5 = 0x200000,  min6 = 0x4000000,  max6 = 0x7fffffff};class UTF8Decoder : public Decoder {public:  UTF8Decoder();  size_t decode(Char *, const char *, size_t, const char **);private:  // value for encoding error  enum { invalid = 0xfffd };  Boolean recovering_;};class UTF8Encoder : public Encoder {public:  UTF8Encoder();  void output(const Char *, size_t, OutputByteStream *);};Decoder *UTF8CodingSystem::makeDecoder() const{  return new UTF8Decoder;}Encoder *UTF8CodingSystem::makeEncoder() const{  return new UTF8Encoder;}UTF8Decoder::UTF8Decoder(): recovering_(0){}size_t UTF8Decoder::decode(Char *to, const char *s,			  size_t slen, const char **result){  Char *start = to;  const unsigned char *us = (const unsigned char *)s;  if (recovering_) {    recovering_ = 0;    goto recover;  }  while (slen > 0) {    unsigned c0;    c0 = us[0];    if ((c0 & cmask1) == cval1) {      *to++ = c0;      us++;      slen--;    }    else if ((c0 & cmask2) == cval2) {      if (slen < 2)	goto done;      unsigned c1 = us[1] ^ 0x80;      if (c1 & 0xc0)	goto error;      unsigned c = ((c0 & vmask2) << 6) | c1;      if (c < min2)	c = invalid;      *to++ = c;      slen -= 2;      us += 2;    }    else if ((c0 & cmask3) == cval3) {      if (slen < 3)	goto done;      unsigned c1 = us[1] ^ 0x80;      unsigned c2 = us[2] ^ 0x80;      if ((c1 | c2) & 0xc0)	goto error;      unsigned c = ((((c0 & vmask3) << 6) | c1) << 6) | c2;      if (c < min3)	c = invalid;      *to++ = c;      slen -= 3;      us += 3;    }    else if ((c0 & cmask4) == cval4) {      if (slen < 4)	goto done;      unsigned c1 = us[1] ^ 0x80;      unsigned c2 = us[2] ^ 0x80;      unsigned c3 = us[3] ^ 0x80;      if ((c1 | c2 | c3) & 0xc0)	goto error;      if (charMax < min5 - 1)	*to++ = invalid;      else {	unsigned long c = ((((c0 & vmask4) << 6) | c1) << 6) | c2;	c = (c << 6) | c3;	if (c < min4)	  c = invalid;	*to++ = c;      }      slen -= 4;      us += 4;    }    else if ((c0 & cmask5) == cval5) {      if (slen < 5)	goto done;      unsigned c1 = us[1] ^ 0x80;      unsigned c2 = us[2] ^ 0x80;      unsigned c3 = us[3] ^ 0x80;      unsigned c4 = us[4] ^ 0x80;      if ((c1 | c2 | c3 | c4) & 0xc0)	goto error;      if (charMax < min6 - 1)	*to++ = invalid;      else {	unsigned long c = ((((c0 & vmask5) << 6) | c1) << 6) | c2;	c = (((c << 6) | c3) << 6) | c4;	if (c < min5)	  c = invalid;	*to++ = c;      }      slen -= 5;      us += 5;    }    else if ((c0 & cmask6) == cval6) {      if (slen < 6)	goto done;      unsigned c1 = us[1] ^ 0x80;      unsigned c2 = us[2] ^ 0x80;      unsigned c3 = us[3] ^ 0x80;      unsigned c4 = us[4] ^ 0x80;      unsigned c5 = us[5] ^ 0x80;      if ((c1 | c2 | c3 | c4 | c5) & 0xc0)	goto error;      if (charMax < max6)	*to++ = invalid;      else {	unsigned long c = ((((c0 & vmask6) << 6) | c1) << 6) | c2;	c = (((((c << 6) | c3) << 6) | c4) << 6) | c5;	if (c < min6)	  c = invalid;	*to++ = c;      }      slen -= 6;      us += 6;    }    else {    error:      us++;      slen--;      *to++ = invalid;    recover:      for (;;) {	if (slen == 0) {	  recovering_ = 1;	  goto done;	}	if ((*us & 0xc0) != 0x80)	  break;	us++;	slen--;      }    }  } done:  *result = (char *)us;  return to - start;}UTF8Encoder::UTF8Encoder(){}void UTF8Encoder::output(const Char *s, size_t n, OutputByteStream *sb){  for (; n > 0; s++, n--) {    Char c = *s;    if (c < min2)      sb->sputc((unsigned char)c);    else if (c < min3) {      sb->sputc((c >> 6) | cval2);      sb->sputc((c & 0x3f) | 0x80);    }    else if (c < min4) {      sb->sputc((c >> 12) | cval3);      sb->sputc(((c >> 6) & 0x3f) | 0x80);      sb->sputc((c & 0x3f) | 0x80);    }    else if (c < min5) {      sb->sputc((c >> 18) | cval4);      sb->sputc(((c >> 12) & 0x3f) | 0x80);      sb->sputc(((c >> 6) & 0x3f) | 0x80);      sb->sputc((c & 0x3f) | 0x80);    }    else if (c < min6) {      sb->sputc((c >> 24) | cval5);      sb->sputc(((c >> 18) & 0x3f) | 0x80);      sb->sputc(((c >> 12) & 0x3f) | 0x80);      sb->sputc(((c >> 6) & 0x3f) | 0x80);      sb->sputc((c & 0x3f) | 0x80);    }    else if (c <= max6) {      sb->sputc((c >> 30) | cval6);      sb->sputc(((c >> 24) & 0x3f) | 0x80);      sb->sputc(((c >> 18) & 0x3f) | 0x80);      sb->sputc(((c >> 12) & 0x3f) | 0x80);      sb->sputc(((c >> 6) & 0x3f) | 0x80);      sb->sputc((c & 0x3f) | 0x80);    }  }}#ifdef SP_NAMESPACE}#endif#else /* not SP_MULTI_BYTE */#ifndef __GNUG__static char non_empty_translation_unit;	// sigh#endif#endif /* not SP_MULTI_BYTE */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -