⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 charsetregistry.cxx

📁 SP是一个基于GNU C++编译器
💻 CXX
字号:
// Copyright (c) 1994, 1997 James Clark// See the file COPYING for copying permission.#ifdef __GNUG__#pragma implementation#endif#include "splib.h"#include "CharsetRegistry.h"#include "CharsetInfo.h"#include "StringC.h"#include "types.h"#include "macros.h"#ifdef SP_NAMESPACEnamespace SP_NAMESPACE {#endifCharsetRegistry::Iter::~Iter(){}class CharsetRegistryRangeIter : public CharsetRegistry::Iter {public:  CharsetRegistryRangeIter(const UnivCharsetDesc::Range *p, size_t n)    : p_(p), n_(n) { }  Boolean next(WideChar &min, WideChar &max, UnivChar &univ) {    if (n_) {      min = p_->descMin;      max = p_->descMin + (p_->count - 1);      univ = p_->univMin;      p_++;      n_--;      return 1;    }    else      return 0;  }private:  const UnivCharsetDesc::Range *p_;  size_t n_;};class CharsetRegistryDescIter : public CharsetRegistry::Iter {public:  CharsetRegistryDescIter(const unsigned short *p)    : p_(p + 2), n_(p[0]), c_(p[1]) { }  Boolean next(WideChar &min, WideChar &max, UnivChar &univ) {    if (n_ == 0) {      n_ = *p_;      if (n_ == 0)	return 0;      p_++;      c_ = *p_++;    }    int i = 1;    for (; i < n_; i++)      if (p_[i] != p_[i - 1] + 1)	break;    min = c_;    max = min + (i - 1);    univ = p_[0];    p_ += i;    c_ += i;    n_ -= i;    return 1;  }private:  const unsigned short *p_;  size_t n_;  WideChar c_;};static struct {  const char *esc;  CharsetRegistry::ISORegistrationNumber number;} escTable[] = {  { "\x1B\x25\x40", CharsetRegistry::ISO646_ASCII_G0 },  { "\x1B\x28\x40", CharsetRegistry::ISO646_ASCII_G0 },  { "\x1B\x28\x42", CharsetRegistry::ISO646_ASCII_G0 }, // ASCII  { "\x1B\x21\x40", CharsetRegistry::ISO646_C0 },  { "\x1B\x2D\x41", CharsetRegistry::ISO8859_1 },  { "\x1B\x2D\x42", CharsetRegistry::ISO8859_2 },  { "\x1B\x2D\x43", CharsetRegistry::ISO8859_3 },  { "\x1B\x2D\x44", CharsetRegistry::ISO8859_4 },  { "\x1B\x2D\x4C", CharsetRegistry::ISO8859_5 },  { "\x1B\x2D\x47", CharsetRegistry::ISO8859_6 },  { "\x1B\x2D\x46", CharsetRegistry::ISO8859_7 },  { "\x1B\x2D\x48", CharsetRegistry::ISO8859_8 },  { "\x1B\x2D\x4D", CharsetRegistry::ISO8859_9 },  { "\x1B\x28\x4A", CharsetRegistry::ISO646_JIS_G0 },  { "\x1B\x28\x49", CharsetRegistry::JIS0201 },  { "\x1B\x24\x42", CharsetRegistry::JIS0208 },  { "\x1B\x26\x40\x1B\x24\x42", CharsetRegistry::JIS0208 },  { "\x1B\x24\x28\x44", CharsetRegistry::JIS0212 },  { "\x1B\x24\x41", CharsetRegistry::GB2312 },  { "\x1B\x24\x28\x43", CharsetRegistry::KSC5601 },  { "\x1B\x25\x2F\x40", CharsetRegistry::ISO10646_UCS2 },  { "\x1B\x25\x2F\x41", CharsetRegistry::ISO10646_UCS4 },  { "\x1B\x25\x2F\x43", CharsetRegistry::ISO10646_UCS2 },  { "\x1B\x25\x2F\x44", CharsetRegistry::ISO10646_UCS4 },  { "\x1B\x25\x2F\x45", CharsetRegistry::ISO10646_UCS2 },  { "\x1B\x25\x2F\x46", CharsetRegistry::ISO10646_UCS4 },};static const UnivCharsetDesc::Range iso646_ascii[] = {  { 0, 128, 0 },};static const UnivCharsetDesc::Range iso646_C0[] = {  { 0, 32, 0 },  { 127, 1, 127 },};static const UnivCharsetDesc::Range iso6429[] = {  { 0, 32, 128 },};static const UnivCharsetDesc::Range iso8859_1[] = {  { 32, 96, 160 },};static const UnivCharsetDesc::Range iso10646_ucs2[] = {  { 0, 65536, 0 },};static const UnivCharsetDesc::Range iso10646_ucs4[] = {  { 0, 0x80000000, 0 },};static struct {  CharsetRegistry::ISORegistrationNumber number;  const UnivCharsetDesc::Range *ranges;  size_t nRanges;} rangeTable[] = {  { CharsetRegistry::ISO646_ASCII_G0, iso646_ascii, SIZEOF(iso646_ascii) },  { CharsetRegistry::ISO646_C0, iso646_C0, SIZEOF(iso646_C0) },  { CharsetRegistry::ISO6429, iso6429, SIZEOF(iso6429) },  { CharsetRegistry::ISO8859_1, iso8859_1, SIZEOF(iso8859_1) },  { CharsetRegistry::ISO10646_UCS2, iso10646_ucs2, SIZEOF(iso10646_ucs2) },  { CharsetRegistry::ISO10646_UCS4, iso10646_ucs4, SIZEOF(iso10646_ucs4) },};static const unsigned short iso8859_2[] = {#include "iso8859-2.h"};static const unsigned short iso8859_3[] = {#include "iso8859-3.h"};static const unsigned short iso8859_4[] = {#include "iso8859-4.h"};static const unsigned short iso8859_5[] = {#include "iso8859-5.h"};static const unsigned short iso8859_6[] = {#include "iso8859-6.h"};static const unsigned short iso8859_7[] = {#include "iso8859-7.h"};static const unsigned short iso8859_8[] = {#include "iso8859-8.h"};static const unsigned short iso8859_9[] = {#include "iso8859-9.h"};static const unsigned short iso646_jis_G0[] = {#include "iso646-jis.h"};static const unsigned short jis0201[] = {#include "jis0201.h"};#ifdef SP_MULTI_BYTEstatic const unsigned short jis0208[] = {#include "jis0208.h"};static const unsigned short jis0212[] = {#include "jis0212.h"};static const unsigned short gb2312[] = {#include "gb2312.h"};static const unsigned short ksc5601[] = {#include "ksc5601.h"};static const unsigned short big5[] = {#include "big5.h"};#endif /* SP_MULTI_BYTE */static const struct {  CharsetRegistry::ISORegistrationNumber number;  const unsigned short *desc;} descTable[] = {  { CharsetRegistry::ISO8859_2, iso8859_2 },  { CharsetRegistry::ISO8859_3, iso8859_3 },  { CharsetRegistry::ISO8859_4, iso8859_4 },  { CharsetRegistry::ISO8859_5, iso8859_5 },  { CharsetRegistry::ISO8859_6, iso8859_6 },  { CharsetRegistry::ISO8859_7, iso8859_7 },  { CharsetRegistry::ISO8859_8, iso8859_8 },  { CharsetRegistry::ISO8859_9, iso8859_9 },  { CharsetRegistry::ISO646_JIS_G0, iso646_jis_G0 },  { CharsetRegistry::JIS0201, jis0201 },#ifdef SP_MULTI_BYTE  { CharsetRegistry::JIS0208, jis0208 },  { CharsetRegistry::JIS0212, jis0212 },  { CharsetRegistry::GB2312, gb2312 },  { CharsetRegistry::KSC5601, ksc5601 },  { CharsetRegistry::BIG5, big5 },#endif};CharsetRegistry::ISORegistrationNumberCharsetRegistry::getRegistrationNumber(const StringC &sequence,				       const CharsetInfo &charset){  // Canonicalize the escape sequence by mapping esc -> ESC,  // removing leading zeros from escape sequences, and removing  // initial spaces.  StringC s;  for (size_t i = 0; i < sequence.size(); i++) {    Char c = sequence[i];    if (c == charset.execToDesc('e'))      s += charset.execToDesc('E');    else if (c == charset.execToDesc('s'))      s += charset.execToDesc('S');    else if (c == charset.execToDesc('c'))      s += charset.execToDesc('C');    else if (charset.digitWeight(c) >= 0	     && s.size() > 0	     && s[s.size() - 1] == charset.execToDesc('0')	     && (s.size() == 1		 || charset.digitWeight(s[s.size() - 2]) >= 0))      s[s.size() - 1] = c;    else if (c != charset.execToDesc(' ') || s.size() > 0)      s += c;  }  for (size_t i = 0; i < SIZEOF(escTable); i++) {    StringC esc;    for (const char *p = escTable[i].esc; *p; p++) {      if (*p == 0x1B)	esc += charset.execToDesc("ESC");      else {	static const char digits[] = "0123456789";	int c = (unsigned char)*p >> 4;	if (c >= 10)	  esc += charset.execToDesc('1');	esc += charset.execToDesc(digits[c % 10]);	esc += charset.execToDesc('/');	c = (*p & 0xf);	if (c >= 10)	  esc += charset.execToDesc('1');	esc += charset.execToDesc(digits[c % 10]);      }      if (p[1])	esc += charset.execToDesc(' ');    }    if (s == esc)      return escTable[i].number;  }  return UNREGISTERED;}CharsetRegistry::Iter *CharsetRegistry::makeIter(ISORegistrationNumber number){  for (size_t i = 0; i < SIZEOF(rangeTable); i++) {    if (number == rangeTable[i].number)      return new CharsetRegistryRangeIter(rangeTable[i].ranges, rangeTable[i].nRanges);  }  for (size_t i = 0; i < SIZEOF(descTable); i++) {    if (number == descTable[i].number)      return new CharsetRegistryDescIter(descTable[i].desc);  }  return 0;}#ifdef SP_NAMESPACE}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -