⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmltok.c

📁 一个用visual studio C++开发的XML文件解析和生成程序.
💻 C
📖 第 1 页 / 共 3 页
字号:
/*Copyright (c) 1998, 1999 Thai Open Source Software Center LtdSee the file copying.txt for copying permission.*/#include "xmldef.h"#include "xmltok.h"#include "nametab.h"#ifdef XML_DTD#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)#else#define IGNORE_SECTION_TOK_VTABLE /* as nothing */#endif#define VTABLE1 \  { PREFIX(prologTok), PREFIX(contentTok), \    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \  { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \  PREFIX(sameName), \  PREFIX(nameMatchesAscii), \  PREFIX(nameLength), \  PREFIX(skipS), \  PREFIX(getAtts), \  PREFIX(charRefNumber), \  PREFIX(predefinedEntityName), \  PREFIX(updatePosition), \  PREFIX(isPublicId)#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)#define UCS2_GET_NAMING(pages, hi, lo) \   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))/* A 2 byte UTF-8 representation splits the characters 11 bitsbetween the bottom 5 and 6 bits of the bytes.We need 8 bits to index into pages, 3 bits to add to that index and5 bits to generate the mask. */#define UTF8_GET_NAMING2(pages, byte) \    (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \                      + ((((byte)[0]) & 3) << 1) \                      + ((((byte)[1]) >> 5) & 1)] \         & (1 << (((byte)[1]) & 0x1F)))/* A 3 byte UTF-8 representation splits the characters 16 bitsbetween the bottom 4, 6 and 6 bits of the bytes.We need 8 bits to index into pages, 3 bits to add to that index and5 bits to generate the mask. */#define UTF8_GET_NAMING3(pages, byte) \  (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \                             + ((((byte)[1]) >> 2) & 0xF)] \		       << 3) \                      + ((((byte)[1]) & 3) << 1) \                      + ((((byte)[2]) >> 5) & 1)] \         & (1 << (((byte)[2]) & 0x1F)))#define UTF8_GET_NAMING(pages, p, n) \  ((n) == 2 \  ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \  : ((n) == 3 \     ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \     : 0))#define UTF8_INVALID3(p) \  ((*p) == 0xED \  ? (((p)[1] & 0x20) != 0) \  : ((*p) == 0xEF \     ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \     : 0))#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)staticint isNever(const ENCODING *enc, const char *p){  return 0;}staticint utf8_isName2(const ENCODING *enc, const char *p){  return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);}staticint utf8_isName3(const ENCODING *enc, const char *p){  return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);}#define utf8_isName4 isNeverstaticint utf8_isNmstrt2(const ENCODING *enc, const char *p){  return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);}staticint utf8_isNmstrt3(const ENCODING *enc, const char *p){  return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);}#define utf8_isNmstrt4 isNever#define utf8_isInvalid2 isNeverstaticint utf8_isInvalid3(const ENCODING *enc, const char *p){  return UTF8_INVALID3((const unsigned char *)p);}staticint utf8_isInvalid4(const ENCODING *enc, const char *p){  return UTF8_INVALID4((const unsigned char *)p);}struct normal_encoding {  ENCODING enc;  unsigned char type[256];#ifdef XML_MIN_SIZE  int (*byteType)(const ENCODING *, const char *);  int (*isNameMin)(const ENCODING *, const char *);  int (*isNmstrtMin)(const ENCODING *, const char *);  int (*byteToAscii)(const ENCODING *, const char *);  int (*charMatches)(const ENCODING *, const char *, int);#endif /* XML_MIN_SIZE */  int (*isName2)(const ENCODING *, const char *);  int (*isName3)(const ENCODING *, const char *);  int (*isName4)(const ENCODING *, const char *);  int (*isNmstrt2)(const ENCODING *, const char *);  int (*isNmstrt3)(const ENCODING *, const char *);  int (*isNmstrt4)(const ENCODING *, const char *);  int (*isInvalid2)(const ENCODING *, const char *);  int (*isInvalid3)(const ENCODING *, const char *);  int (*isInvalid4)(const ENCODING *, const char *);};#ifdef XML_MIN_SIZE#define STANDARD_VTABLE(E) \ E ## byteType, \ E ## isNameMin, \ E ## isNmstrtMin, \ E ## byteToAscii, \ E ## charMatches,#else#define STANDARD_VTABLE(E) /* as nothing */#endif#define NORMAL_VTABLE(E) \ E ## isName2, \ E ## isName3, \ E ## isName4, \ E ## isNmstrt2, \ E ## isNmstrt3, \ E ## isNmstrt4, \ E ## isInvalid2, \ E ## isInvalid3, \ E ## isInvalid4static int checkCharRefNumber(int);#include "xmltok_impl.h"#include "ascii.h"#ifdef XML_MIN_SIZE#define sb_isNameMin isNever#define sb_isNmstrtMin isNever#endif#ifdef XML_MIN_SIZE#define MINBPC(enc) ((enc)->minBytesPerChar)#else/* minimum bytes per character */#define MINBPC(enc) 1#endif#define SB_BYTE_TYPE(enc, p) \  (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])#ifdef XML_MIN_SIZEstaticint sb_byteType(const ENCODING *enc, const char *p){  return SB_BYTE_TYPE(enc, p);}#define BYTE_TYPE(enc, p) \ (((const struct normal_encoding *)(enc))->byteType(enc, p))#else#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)#endif#ifdef XML_MIN_SIZE#define BYTE_TO_ASCII(enc, p) \ (((const struct normal_encoding *)(enc))->byteToAscii(enc, p))staticint sb_byteToAscii(const ENCODING *enc, const char *p){  return *p;}#else#define BYTE_TO_ASCII(enc, p) (*(p))#endif#define IS_NAME_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isName ## n(enc, p))#define IS_NMSTRT_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))#define IS_INVALID_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))#ifdef XML_MIN_SIZE#define IS_NAME_CHAR_MINBPC(enc, p) \ (((const struct normal_encoding *)(enc))->isNameMin(enc, p))#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p))#else#define IS_NAME_CHAR_MINBPC(enc, p) (0)#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)#endif#ifdef XML_MIN_SIZE#define CHAR_MATCHES(enc, p, c) \ (((const struct normal_encoding *)(enc))->charMatches(enc, p, c))staticint sb_charMatches(const ENCODING *enc, const char *p, int c){  return *p == c;}#else/* c is an ASCII character */#define CHAR_MATCHES(enc, p, c) (*(p) == c)#endif#define PREFIX(ident) normal_ ## ident#include "xmltok_impl.c"#undef MINBPC#undef BYTE_TYPE#undef BYTE_TO_ASCII#undef CHAR_MATCHES#undef IS_NAME_CHAR#undef IS_NAME_CHAR_MINBPC#undef IS_NMSTRT_CHAR#undef IS_NMSTRT_CHAR_MINBPC#undef IS_INVALID_CHARenum {  /* UTF8_cvalN is value of masked first byte of N byte sequence */  UTF8_cval1 = 0x00,  UTF8_cval2 = 0xc0,  UTF8_cval3 = 0xe0,  UTF8_cval4 = 0xf0};staticvoid utf8_toUtf8(const ENCODING *enc,		 const char **fromP, const char *fromLim,		 char **toP, const char *toLim){  char *to;  const char *from;  if (fromLim - *fromP > toLim - *toP) {    /* Avoid copying partial characters. */    for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)      if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)	break;  }  for (to = *toP, from = *fromP; from != fromLim; from++, to++)    *to = *from;  *fromP = from;  *toP = to;}staticvoid utf8_toUtf16(const ENCODING *enc,		  const char **fromP, const char *fromLim,		  unsigned short **toP, const unsigned short *toLim){  unsigned short *to = *toP;  const char *from = *fromP;  while (from != fromLim && to != toLim) {    switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {    case BT_LEAD2:      *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);      from += 2;      break;    case BT_LEAD3:      *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);      from += 3;      break;    case BT_LEAD4:      {	unsigned long n;	if (to + 1 == toLim)	  break;	n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);	n -= 0x10000;	to[0] = (unsigned short)((n >> 10) | 0xD800);	to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);	to += 2;	from += 4;      }      break;    default:      *to++ = *from++;      break;    }  }  *fromP = from;  *toP = to;}#ifdef XML_NSstatic const struct normal_encoding utf8_encoding_ns = {  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },  {#include "asciitab.h"#include "utf8tab.h"  },  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#endifstatic const struct normal_encoding utf8_encoding = {  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },  {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON#include "utf8tab.h"  },  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#ifdef XML_NSstatic const struct normal_encoding internal_utf8_encoding_ns = {  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },  {#include "iasciitab.h"#include "utf8tab.h"  },  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#endifstatic const struct normal_encoding internal_utf8_encoding = {  { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },  {#define BT_COLON BT_NMSTRT#include "iasciitab.h"#undef BT_COLON#include "utf8tab.h"  },  STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};staticvoid latin1_toUtf8(const ENCODING *enc,		   const char **fromP, const char *fromLim,		   char **toP, const char *toLim){  for (;;) {    unsigned char c;    if (*fromP == fromLim)      break;    c = (unsigned char)**fromP;    if (c & 0x80) {      if (toLim - *toP < 2)	break;      *(*toP)++ = ((c >> 6) | UTF8_cval2);      *(*toP)++ = ((c & 0x3f) | 0x80);      (*fromP)++;    }    else {      if (*toP == toLim)	break;      *(*toP)++ = *(*fromP)++;    }  }}staticvoid latin1_toUtf16(const ENCODING *enc,		    const char **fromP, const char *fromLim,		    unsigned short **toP, const unsigned short *toLim){  while (*fromP != fromLim && *toP != toLim)    *(*toP)++ = (unsigned char)*(*fromP)++;}#ifdef XML_NSstatic const struct normal_encoding latin1_encoding_ns = {  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },  {#include "asciitab.h"#include "latin1tab.h"  },  STANDARD_VTABLE(sb_)};#endifstatic const struct normal_encoding latin1_encoding = {  { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },  {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON#include "latin1tab.h"  },  STANDARD_VTABLE(sb_)};staticvoid ascii_toUtf8(const ENCODING *enc,		  const char **fromP, const char *fromLim,		  char **toP, const char *toLim){  while (*fromP != fromLim && *toP != toLim)    *(*toP)++ = *(*fromP)++;}#ifdef XML_NSstatic const struct normal_encoding ascii_encoding_ns = {  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },  {#include "asciitab.h"/* BT_NONXML == 0 */  },  STANDARD_VTABLE(sb_)};#endifstatic const struct normal_encoding ascii_encoding = {  { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },  {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON/* BT_NONXML == 0 */  },  STANDARD_VTABLE(sb_)};static int unicode_byte_type(char hi, char lo){  switch ((unsigned char)hi) {  case 0xD8: case 0xD9: case 0xDA: case 0xDB:    return BT_LEAD4;  case 0xDC: case 0xDD: case 0xDE: case 0xDF:    return BT_TRAIL;  case 0xFF:    switch ((unsigned char)lo) {    case 0xFF:    case 0xFE:      return BT_NONXML;    }    break;  }  return BT_NONASCII;}#define DEFINE_UTF16_TO_UTF8(E) \static \void E ## toUtf8(const ENCODING *enc, \		 const char **fromP, const char *fromLim, \		 char **toP, const char *toLim) \{ \  const char *from; \  for (from = *fromP; from != fromLim; from += 2) { \    int plane; \    unsigned char lo2; \    unsigned char lo = GET_LO(from); \    unsigned char hi = GET_HI(from); \    switch (hi) { \    case 0: \      if (lo < 0x80) { \        if (*toP == toLim) { \          *fromP = from; \	  return; \        } \        *(*toP)++ = lo; \        break; \      } \      /* fall through */ \    case 0x1: case 0x2: case 0x3: \    case 0x4: case 0x5: case 0x6: case 0x7: \      if (toLim -  *toP < 2) { \        *fromP = from; \	return; \      } \      *(*toP)++ = ((lo >> 6) | (hi << 2) |  UTF8_cval2); \      *(*toP)++ = ((lo & 0x3f) | 0x80); \      break; \    default: \      if (toLim -  *toP < 3)  { \        *fromP = from; \	return; \      } \      /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \      *(*toP)++ = ((hi >> 4) | UTF8_cval3); \      *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \      *(*toP)++ = ((lo & 0x3f) | 0x80); \      break; \    case 0xD8: case 0xD9: case 0xDA: case 0xDB: \      if (toLim -  *toP < 4) { \	*fromP = from; \	return; \

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -