xmltok.c

来自「很牛的GUI源码wxWidgets-2.8.0.zip 可在多种平台下运行.」· C语言代码 · 共 1,637 行 · 第 1/3 页
1,637 行
/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd   See the file COPYING for copying permission.*/#ifdef COMPILED_FROM_DSP#include "winconfig.h"#elif defined(OS2_32)#include "os2config.h"#elif defined(__MSDOS__)#include "dosconfig.h"#elif defined(MACOS_CLASSIC)#include "macconfig.h"#else#include "expat_config.h"#endif /* ndef COMPILED_FROM_DSP */#include "internal.h"#include "xmltok.h"#include "nametab.h"#ifdef XML_DTD#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)#else#define IGNORE_SECTION_TOK_VTABLE /* as nothing */#endif#define VTABLE1 \  { PREFIX(prologTok), PREFIX(contentTok), \    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \  PREFIX(sameName), \  PREFIX(nameMatchesAscii), \  PREFIX(nameLength), \  PREFIX(skipS), \  PREFIX(getAtts), \  PREFIX(charRefNumber), \  PREFIX(predefinedEntityName), \  PREFIX(updatePosition), \  PREFIX(isPublicId)#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)#define UCS2_GET_NAMING(pages, hi, lo) \   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))/* A 2 byte UTF-8 representation splits the characters 11 bits between   the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into   pages, 3 bits to add to that index and 5 bits to generate the mask.*/#define UTF8_GET_NAMING2(pages, byte) \    (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \                      + ((((byte)[0]) & 3) << 1) \                      + ((((byte)[1]) >> 5) & 1)] \         & (1 << (((byte)[1]) & 0x1F)))/* A 3 byte UTF-8 representation splits the characters 16 bits between   the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index   into pages, 3 bits to add to that index and 5 bits to generate the   mask.*/#define UTF8_GET_NAMING3(pages, byte) \  (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \                             + ((((byte)[1]) >> 2) & 0xF)] \                       << 3) \                      + ((((byte)[1]) & 3) << 1) \                      + ((((byte)[2]) >> 5) & 1)] \         & (1 << (((byte)[2]) & 0x1F)))#define UTF8_GET_NAMING(pages, p, n) \  ((n) == 2 \  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \  : ((n) == 3 \     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \     : 0))/* Detection of invalid UTF-8 sequences is based on Table 3.1B   of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/   with the additional restriction of not allowing the Unicode   code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).   Implementation details:     (A & 0x80) == 0     means A < 0x80   and     (A & 0xC0) == 0xC0  means A > 0xBF*/#define UTF8_INVALID2(p) \  ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)#define UTF8_INVALID3(p) \  (((p)[2] & 0x80) == 0 \  || \  ((*p) == 0xEF && (p)[1] == 0xBF \    ? \    (p)[2] > 0xBD \    : \    ((p)[2] & 0xC0) == 0xC0) \  || \  ((*p) == 0xE0 \    ? \    (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \    : \    ((p)[1] & 0x80) == 0 \    || \    ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))#define UTF8_INVALID4(p) \  (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 \  || \  ((p)[2] & 0x80) == 0 || ((p)[2] & 0xC0) == 0xC0 \  || \  ((*p) == 0xF0 \    ? \    (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \    : \    ((p)[1] & 0x80) == 0 \    || \    ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))static int PTRFASTCALLisNever(const ENCODING *enc, const char *p){  return 0;}static int PTRFASTCALLutf8_isName2(const ENCODING *enc, const char *p){  return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);}static int PTRFASTCALLutf8_isName3(const ENCODING *enc, const char *p){  return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);}#define utf8_isName4 isNeverstatic int PTRFASTCALLutf8_isNmstrt2(const ENCODING *enc, const char *p){  return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);}static int PTRFASTCALLutf8_isNmstrt3(const ENCODING *enc, const char *p){  return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);}#define utf8_isNmstrt4 isNeverstatic int PTRFASTCALLutf8_isInvalid2(const ENCODING *enc, const char *p){  return UTF8_INVALID2((const unsigned char *)p);}static int PTRFASTCALLutf8_isInvalid3(const ENCODING *enc, const char *p){  return UTF8_INVALID3((const unsigned char *)p);}static int PTRFASTCALLutf8_isInvalid4(const ENCODING *enc, const char *p){  return UTF8_INVALID4((const unsigned char *)p);}struct normal_encoding {  ENCODING enc;  unsigned char type[256];#ifdef XML_MIN_SIZE  int (PTRFASTCALL *byteType)(const ENCODING *, const char *);  int (PTRFASTCALL *isNameMin)(const ENCODING *, const char *);  int (PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);  int (PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);  int (PTRCALL *charMatches)(const ENCODING *, const char *, int);#endif /* XML_MIN_SIZE */  int (PTRFASTCALL *isName2)(const ENCODING *, const char *);  int (PTRFASTCALL *isName3)(const ENCODING *, const char *);  int (PTRFASTCALL *isName4)(const ENCODING *, const char *);  int (PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);  int (PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);  int (PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);  int (PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);  int (PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);  int (PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);};#define AS_NORMAL_ENCODING(enc)   ((const struct normal_encoding *) (enc))#ifdef XML_MIN_SIZE#define STANDARD_VTABLE(E) \ E ## byteType, \ E ## isNameMin, \ E ## isNmstrtMin, \ E ## byteToAscii, \ E ## charMatches,#else#define STANDARD_VTABLE(E) /* as nothing */#endif#define NORMAL_VTABLE(E) \ E ## isName2, \ E ## isName3, \ E ## isName4, \ E ## isNmstrt2, \ E ## isNmstrt3, \ E ## isNmstrt4, \ E ## isInvalid2, \ E ## isInvalid3, \ E ## isInvalid4static int FASTCALL checkCharRefNumber(int);#include "xmltok_impl.h"#include "ascii.h"#ifdef XML_MIN_SIZE#define sb_isNameMin isNever#define sb_isNmstrtMin isNever#endif#ifdef XML_MIN_SIZE#define MINBPC(enc) ((enc)->minBytesPerChar)#else/* minimum bytes per character */#define MINBPC(enc) 1#endif#define SB_BYTE_TYPE(enc, p) \  (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])#ifdef XML_MIN_SIZEstatic int PTRFASTCALLsb_byteType(const ENCODING *enc, const char *p){  return SB_BYTE_TYPE(enc, p);}#define BYTE_TYPE(enc, p) \ (AS_NORMAL_ENCODING(enc)->byteType(enc, p))#else#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)#endif#ifdef XML_MIN_SIZE#define BYTE_TO_ASCII(enc, p) \ (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))static int PTRFASTCALLsb_byteToAscii(const ENCODING *enc, const char *p){  return *p;}#else#define BYTE_TO_ASCII(enc, p) (*(p))#endif#define IS_NAME_CHAR(enc, p, n) \ (AS_NORMAL_ENCODING(enc)->isName ## n(enc, p))#define IS_NMSTRT_CHAR(enc, p, n) \ (AS_NORMAL_ENCODING(enc)->isNmstrt ## n(enc, p))#define IS_INVALID_CHAR(enc, p, n) \ (AS_NORMAL_ENCODING(enc)->isInvalid ## n(enc, p))#ifdef XML_MIN_SIZE#define IS_NAME_CHAR_MINBPC(enc, p) \ (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))#else#define IS_NAME_CHAR_MINBPC(enc, p) (0)#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)#endif#ifdef XML_MIN_SIZE#define CHAR_MATCHES(enc, p, c) \ (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))static int PTRCALLsb_charMatches(const ENCODING *enc, const char *p, int c){  return *p == c;}#else/* c is an ASCII character */#define CHAR_MATCHES(enc, p, c) (*(p) == c)#endif#define PREFIX(ident) normal_ ## ident#include "xmltok_impl.c"#undef MINBPC#undef BYTE_TYPE#undef BYTE_TO_ASCII#undef CHAR_MATCHES#undef IS_NAME_CHAR#undef IS_NAME_CHAR_MINBPC#undef IS_NMSTRT_CHAR#undef IS_NMSTRT_CHAR_MINBPC#undef IS_INVALID_CHARenum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */  UTF8_cval1 = 0x00,  UTF8_cval2 = 0xc0,  UTF8_cval3 = 0xe0,  UTF8_cval4 = 0xf0};static void PTRCALLutf8_toUtf8(const ENCODING *enc,            const char **fromP, const char *fromLim,            char **toP, const char *toLim){  char *to;  const char *from;  if (fromLim - *fromP > toLim - *toP) {    /* Avoid copying partial characters. */    for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)      if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)        break;  }  for (to = *toP, from = *fromP; from != fromLim; from++, to++)    *to = *from;  *fromP = from;  *toP = to;}static void PTRCALLutf8_toUtf16(const ENCODING *enc,             const char **fromP, const char *fromLim,             unsigned short **toP, const unsigned short *toLim){  unsigned short *to = *toP;  const char *from = *fromP;  while (from != fromLim && to != toLim) {    switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {    case BT_LEAD2:      *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));      from += 2;      break;    case BT_LEAD3:      *to++ = (unsigned short)(((from[0] & 0xf) << 12)                               | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f));      from += 3;      break;    case BT_LEAD4:      {        unsigned long n;        if (to + 1 == toLim)          goto after;        n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)            | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);        n -= 0x10000;        to[0] = (unsigned short)((n >> 10) | 0xD800);        to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);        to += 2;        from += 4;      }      break;    default:      *to++ = *from++;      break;    }  }after:  *fromP = from;  *toP = to;}#ifdef XML_NSstatic const struct normal_encoding utf8_encoding_ns = {  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },  {#include "asciitab.h"#include "utf8tab.h"  },  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#endifstatic const struct normal_encoding utf8_encoding = {  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },  {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON#include "utf8tab.h"  },  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#ifdef XML_NSstatic const struct normal_encoding internal_utf8_encoding_ns = {  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },  {#include "iasciitab.h"#include "utf8tab.h"  },  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#endifstatic const struct normal_encoding internal_utf8_encoding = {  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },  {#define BT_COLON BT_NMSTRT#include "iasciitab.h"#undef BT_COLON#include "utf8tab.h"  },  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};static void PTRCALLlatin1_toUtf8(const ENCODING *enc,              const char **fromP, const char *fromLim,              char **toP, const char *toLim){  for (;;) {    unsigned char c;    if (*fromP == fromLim)      break;    c = (unsigned char)**fromP;    if (c & 0x80) {      if (toLim - *toP < 2)        break;      *(*toP)++ = (char)((c >> 6) | UTF8_cval2);      *(*toP)++ = (char)((c & 0x3f) | 0x80);      (*fromP)++;    }    else {      if (*toP == toLim)        break;      *(*toP)++ = *(*fromP)++;    }  }}static void PTRCALLlatin1_toUtf16(const ENCODING *enc,               const char **fromP, const char *fromLim,               unsigned short **toP, const unsigned short *toLim){  while (*fromP != fromLim && *toP != toLim)    *(*toP)++ = (unsigned char)*(*fromP)++;}#ifdef XML_NSstatic const struct normal_encoding latin1_encoding_ns = {  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },  {#include "asciitab.h"#include "latin1tab.h"  },  STANDARD_VTABLE(sb_)};#endifstatic const struct normal_encoding latin1_encoding = {  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },  {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON#include "latin1tab.h"  },  STANDARD_VTABLE(sb_)};static void PTRCALLascii_toUtf8(const ENCODING *enc,             const char **fromP, const char *fromLim,             char **toP, const char *toLim){  while (*fromP != fromLim && *toP != toLim)    *(*toP)++ = *(*fromP)++;}#ifdef XML_NSstatic const struct normal_encoding ascii_encoding_ns = {  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },  {#include "asciitab.h"/* BT_NONXML == 0 */  },  STANDARD_VTABLE(sb_)};#endifstatic const struct normal_encoding ascii_encoding = {  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },  {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON/* BT_NONXML == 0 */  },  STANDARD_VTABLE(sb_)};static int PTRFASTCALLunicode_byte_type(char hi, char lo){  switch ((unsigned char)hi) {  case 0xD8: case 0xD9: case 0xDA: case 0xDB:    return BT_LEAD4;  case 0xDC: case 0xDD: case 0xDE: case 0xDF:    return BT_TRAIL;  case 0xFF:    switch ((unsigned char)lo) {    case 0xFF:    case 0xFE:      return BT_NONXML;    }    break;  }  return BT_NONASCII;}#define DEFINE_UTF16_TO_UTF8(E) \static void  PTRCALL \E ## toUtf8(const ENCODING *enc, \            const char **fromP, const char *fromLim, \            char **toP, const char *toLim) \{ \  const char *from; \  for (from = *fromP; from != fromLim; from += 2) { \    int plane; \    unsigned char lo2; \    unsigned char lo = GET_LO(from); \    unsigned char hi = GET_HI(from); \    switch (hi) { \    case 0: \      if (lo < 0x80) { \        if (*toP == toLim) { \
xmltok.c - 源码说明

本页面展示了「很牛的GUI源码wxWidgets-2.8.0.zip 可在多种平台下运行.」中的 xmltok.c 源码文件，采用 C语言编程语言编写，共 1,637 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与wxWidgets相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?