📄 xmltok.c
字号:
/*Copyright (c) 1998, 1999 Thai Open Source Software Center LtdSee the file copying.txt for copying permission.*/#include "xmldef.h"#include "xmltok.h"#include "nametab.h"#ifdef XML_DTD#define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)#else#define IGNORE_SECTION_TOK_VTABLE /* as nothing */#endif#define VTABLE1 \ { PREFIX(prologTok), PREFIX(contentTok), \ PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ PREFIX(sameName), \ PREFIX(nameMatchesAscii), \ PREFIX(nameLength), \ PREFIX(skipS), \ PREFIX(getAtts), \ PREFIX(charRefNumber), \ PREFIX(predefinedEntityName), \ PREFIX(updatePosition), \ PREFIX(isPublicId)#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)#define UCS2_GET_NAMING(pages, hi, lo) \ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))/* A 2 byte UTF-8 representation splits the characters 11 bitsbetween the bottom 5 and 6 bits of the bytes.We need 8 bits to index into pages, 3 bits to add to that index and5 bits to generate the mask. */#define UTF8_GET_NAMING2(pages, byte) \ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ & (1 << (((byte)[1]) & 0x1F)))/* A 3 byte UTF-8 representation splits the characters 16 bitsbetween the bottom 4, 6 and 6 bits of the bytes.We need 8 bits to index into pages, 3 bits to add to that index and5 bits to generate the mask. */#define UTF8_GET_NAMING3(pages, byte) \ (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ + ((((byte)[1]) >> 2) & 0xF)] \ << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ & (1 << (((byte)[2]) & 0x1F)))#define UTF8_GET_NAMING(pages, p, n) \ ((n) == 2 \ ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ : ((n) == 3 \ ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0))#define UTF8_INVALID3(p) \ ((*p) == 0xED \ ? (((p)[1] & 0x20) != 0) \ : ((*p) == 0xEF \ ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ : 0))#define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)staticint isNever(const ENCODING *enc, const char *p){ return 0;}staticint utf8_isName2(const ENCODING *enc, const char *p){ return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);}staticint utf8_isName3(const ENCODING *enc, const char *p){ return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);}#define utf8_isName4 isNeverstaticint utf8_isNmstrt2(const ENCODING *enc, const char *p){ return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);}staticint utf8_isNmstrt3(const ENCODING *enc, const char *p){ return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);}#define utf8_isNmstrt4 isNever#define utf8_isInvalid2 isNeverstaticint utf8_isInvalid3(const ENCODING *enc, const char *p){ return UTF8_INVALID3((const unsigned char *)p);}staticint utf8_isInvalid4(const ENCODING *enc, const char *p){ return UTF8_INVALID4((const unsigned char *)p);}struct normal_encoding { ENCODING enc; unsigned char type[256];#ifdef XML_MIN_SIZE int (*byteType)(const ENCODING *, const char *); int (*isNameMin)(const ENCODING *, const char *); int (*isNmstrtMin)(const ENCODING *, const char *); int (*byteToAscii)(const ENCODING *, const char *); int (*charMatches)(const ENCODING *, const char *, int);#endif /* XML_MIN_SIZE */ int (*isName2)(const ENCODING *, const char *); int (*isName3)(const ENCODING *, const char *); int (*isName4)(const ENCODING *, const char *); int (*isNmstrt2)(const ENCODING *, const char *); int (*isNmstrt3)(const ENCODING *, const char *); int (*isNmstrt4)(const ENCODING *, const char *); int (*isInvalid2)(const ENCODING *, const char *); int (*isInvalid3)(const ENCODING *, const char *); int (*isInvalid4)(const ENCODING *, const char *);};#ifdef XML_MIN_SIZE#define STANDARD_VTABLE(E) \ E ## byteType, \ E ## isNameMin, \ E ## isNmstrtMin, \ E ## byteToAscii, \ E ## charMatches,#else#define STANDARD_VTABLE(E) /* as nothing */#endif#define NORMAL_VTABLE(E) \ E ## isName2, \ E ## isName3, \ E ## isName4, \ E ## isNmstrt2, \ E ## isNmstrt3, \ E ## isNmstrt4, \ E ## isInvalid2, \ E ## isInvalid3, \ E ## isInvalid4static int checkCharRefNumber(int);#include "xmltok_impl.h"#include "ascii.h"#ifdef XML_MIN_SIZE#define sb_isNameMin isNever#define sb_isNmstrtMin isNever#endif#ifdef XML_MIN_SIZE#define MINBPC(enc) ((enc)->minBytesPerChar)#else/* minimum bytes per character */#define MINBPC(enc) 1#endif#define SB_BYTE_TYPE(enc, p) \ (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])#ifdef XML_MIN_SIZEstaticint sb_byteType(const ENCODING *enc, const char *p){ return SB_BYTE_TYPE(enc, p);}#define BYTE_TYPE(enc, p) \ (((const struct normal_encoding *)(enc))->byteType(enc, p))#else#define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)#endif#ifdef XML_MIN_SIZE#define BYTE_TO_ASCII(enc, p) \ (((const struct normal_encoding *)(enc))->byteToAscii(enc, p))staticint sb_byteToAscii(const ENCODING *enc, const char *p){ return *p;}#else#define BYTE_TO_ASCII(enc, p) (*(p))#endif#define IS_NAME_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isName ## n(enc, p))#define IS_NMSTRT_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))#define IS_INVALID_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))#ifdef XML_MIN_SIZE#define IS_NAME_CHAR_MINBPC(enc, p) \ (((const struct normal_encoding *)(enc))->isNameMin(enc, p))#define IS_NMSTRT_CHAR_MINBPC(enc, p) \ (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p))#else#define IS_NAME_CHAR_MINBPC(enc, p) (0)#define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)#endif#ifdef XML_MIN_SIZE#define CHAR_MATCHES(enc, p, c) \ (((const struct normal_encoding *)(enc))->charMatches(enc, p, c))staticint sb_charMatches(const ENCODING *enc, const char *p, int c){ return *p == c;}#else/* c is an ASCII character */#define CHAR_MATCHES(enc, p, c) (*(p) == c)#endif#define PREFIX(ident) normal_ ## ident#include "xmltok_impl.c"#undef MINBPC#undef BYTE_TYPE#undef BYTE_TO_ASCII#undef CHAR_MATCHES#undef IS_NAME_CHAR#undef IS_NAME_CHAR_MINBPC#undef IS_NMSTRT_CHAR#undef IS_NMSTRT_CHAR_MINBPC#undef IS_INVALID_CHARenum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0};staticvoid utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim){ char *to; const char *from; if (fromLim - *fromP > toLim - *toP) { /* Avoid copying partial characters. */ for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) break; } for (to = *toP, from = *fromP; from != fromLim; from++, to++) *to = *from; *fromP = from; *toP = to;}staticvoid utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim){ unsigned short *to = *toP; const char *from = *fromP; while (from != fromLim && to != toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f); from += 2; break; case BT_LEAD3: *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f); from += 3; break; case BT_LEAD4: { unsigned long n; if (to + 1 == toLim) break; n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); n -= 0x10000; to[0] = (unsigned short)((n >> 10) | 0xD800); to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); to += 2; from += 4; } break; default: *to++ = *from++; break; } } *fromP = from; *toP = to;}#ifdef XML_NSstatic const struct normal_encoding utf8_encoding_ns = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, {#include "asciitab.h"#include "utf8tab.h" }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#endifstatic const struct normal_encoding utf8_encoding = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON#include "utf8tab.h" }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#ifdef XML_NSstatic const struct normal_encoding internal_utf8_encoding_ns = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, {#include "iasciitab.h"#include "utf8tab.h" }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};#endifstatic const struct normal_encoding internal_utf8_encoding = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, {#define BT_COLON BT_NMSTRT#include "iasciitab.h"#undef BT_COLON#include "utf8tab.h" }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};staticvoid latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim){ for (;;) { unsigned char c; if (*fromP == fromLim) break; c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) break; *(*toP)++ = ((c >> 6) | UTF8_cval2); *(*toP)++ = ((c & 0x3f) | 0x80); (*fromP)++; } else { if (*toP == toLim) break; *(*toP)++ = *(*fromP)++; } }}staticvoid latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim){ while (*fromP != fromLim && *toP != toLim) *(*toP)++ = (unsigned char)*(*fromP)++;}#ifdef XML_NSstatic const struct normal_encoding latin1_encoding_ns = { { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, {#include "asciitab.h"#include "latin1tab.h" }, STANDARD_VTABLE(sb_)};#endifstatic const struct normal_encoding latin1_encoding = { { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON#include "latin1tab.h" }, STANDARD_VTABLE(sb_)};staticvoid ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim){ while (*fromP != fromLim && *toP != toLim) *(*toP)++ = *(*fromP)++;}#ifdef XML_NSstatic const struct normal_encoding ascii_encoding_ns = { { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, {#include "asciitab.h"/* BT_NONXML == 0 */ }, STANDARD_VTABLE(sb_)};#endifstatic const struct normal_encoding ascii_encoding = { { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON/* BT_NONXML == 0 */ }, STANDARD_VTABLE(sb_)};static int unicode_byte_type(char hi, char lo){ switch ((unsigned char)hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: return BT_LEAD4; case 0xDC: case 0xDD: case 0xDE: case 0xDF: return BT_TRAIL; case 0xFF: switch ((unsigned char)lo) { case 0xFF: case 0xFE: return BT_NONXML; } break; } return BT_NONASCII;}#define DEFINE_UTF16_TO_UTF8(E) \static \void E ## toUtf8(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ char **toP, const char *toLim) \{ \ const char *from; \ for (from = *fromP; from != fromLim; from += 2) { \ int plane; \ unsigned char lo2; \ unsigned char lo = GET_LO(from); \ unsigned char hi = GET_HI(from); \ switch (hi) { \ case 0: \ if (lo < 0x80) { \ if (*toP == toLim) { \ *fromP = from; \ return; \ } \ *(*toP)++ = lo; \ break; \ } \ /* fall through */ \ case 0x1: case 0x2: case 0x3: \ case 0x4: case 0x5: case 0x6: case 0x7: \ if (toLim - *toP < 2) { \ *fromP = from; \ return; \ } \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ break; \ default: \ if (toLim - *toP < 3) { \ *fromP = from; \ return; \ } \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ break; \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ if (toLim - *toP < 4) { \ *fromP = from; \ return; \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -