📄 xmltok.c
字号:
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ break; \ default: \ if (toLim - *toP < 3) { \ *fromP = from; \ return; \ } \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ break; \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ if (toLim - *toP < 4) { \ *fromP = from; \ return; \ } \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ from += 2; \ lo2 = GET_LO(from); \ *(*toP)++ = (((lo & 0x3) << 4) \ | ((GET_HI(from) & 0x3) << 2) \ | (lo2 >> 6) \ | 0x80); \ *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ break; \ } \ } \ *fromP = from; \}#define DEFINE_UTF16_TO_UTF16(E) \static \void E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \{ \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ fromLim -= 2; \ for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \}#define SET2(ptr, ch) \ (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))#define GET_LO(ptr) ((unsigned char)(ptr)[0])#define GET_HI(ptr) ((unsigned char)(ptr)[1])DEFINE_UTF16_TO_UTF8(little2_)DEFINE_UTF16_TO_UTF16(little2_)#undef SET2#undef GET_LO#undef GET_HI#define SET2(ptr, ch) \ (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))#define GET_LO(ptr) ((unsigned char)(ptr)[1])#define GET_HI(ptr) ((unsigned char)(ptr)[0])DEFINE_UTF16_TO_UTF8(big2_)DEFINE_UTF16_TO_UTF16(big2_)#undef SET2#undef GET_LO#undef GET_HI#define LITTLE2_BYTE_TYPE(enc, p) \ ((p)[1] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ : unicode_byte_type((p)[1], (p)[0]))#define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)#define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)#define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])#ifdef XML_MIN_SIZEstaticint little2_byteType(const ENCODING *enc, const char *p){ return LITTLE2_BYTE_TYPE(enc, p);}staticint little2_byteToAscii(const ENCODING *enc, const char *p){ return LITTLE2_BYTE_TO_ASCII(enc, p);}staticint little2_charMatches(const ENCODING *enc, const char *p, int c){ return LITTLE2_CHAR_MATCHES(enc, p, c);}staticint little2_isNameMin(const ENCODING *enc, const char *p){ return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);}staticint little2_isNmstrtMin(const ENCODING *enc, const char *p){ return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);}#undef VTABLE#define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16#else /* not XML_MIN_SIZE */#undef PREFIX#define PREFIX(ident) little2_ ## ident#define MINBPC(enc) 2/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */#define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)#define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)#define IS_NAME_CHAR(enc, p, n) 0#define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)#define IS_NMSTRT_CHAR(enc, p, n) (0)#define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)#include "xmltok_impl.c"#undef MINBPC#undef BYTE_TYPE#undef BYTE_TO_ASCII#undef CHAR_MATCHES#undef IS_NAME_CHAR#undef IS_NAME_CHAR_MINBPC#undef IS_NMSTRT_CHAR#undef IS_NMSTRT_CHAR_MINBPC#undef IS_INVALID_CHAR#endif /* not XML_MIN_SIZE */#ifdef XML_NSstatic const struct normal_encoding little2_encoding_ns = { { VTABLE, 2, 0,#if XML_BYTE_ORDER == 12 1#else 0#endif }, {#include "asciitab.h"#include "latin1tab.h" }, STANDARD_VTABLE(little2_)};#endifstatic const struct normal_encoding little2_encoding = { { VTABLE, 2, 0,#if XML_BYTE_ORDER == 12 1#else 0#endif }, {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON#include "latin1tab.h" }, STANDARD_VTABLE(little2_)};#if XML_BYTE_ORDER != 21#ifdef XML_NSstatic const struct normal_encoding internal_little2_encoding_ns = { { VTABLE, 2, 0, 1 }, {#include "iasciitab.h"#include "latin1tab.h" }, STANDARD_VTABLE(little2_)};#endifstatic const struct normal_encoding internal_little2_encoding = { { VTABLE, 2, 0, 1 }, {#define BT_COLON BT_NMSTRT#include "iasciitab.h"#undef BT_COLON#include "latin1tab.h" }, STANDARD_VTABLE(little2_)};#endif#define BIG2_BYTE_TYPE(enc, p) \ ((p)[0] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ : unicode_byte_type((p)[0], (p)[1]))#define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)#define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)#define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])#define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])#ifdef XML_MIN_SIZEstaticint big2_byteType(const ENCODING *enc, const char *p){ return BIG2_BYTE_TYPE(enc, p);}staticint big2_byteToAscii(const ENCODING *enc, const char *p){ return BIG2_BYTE_TO_ASCII(enc, p);}staticint big2_charMatches(const ENCODING *enc, const char *p, int c){ return BIG2_CHAR_MATCHES(enc, p, c);}staticint big2_isNameMin(const ENCODING *enc, const char *p){ return BIG2_IS_NAME_CHAR_MINBPC(enc, p);}staticint big2_isNmstrtMin(const ENCODING *enc, const char *p){ return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);}#undef VTABLE#define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16#else /* not XML_MIN_SIZE */#undef PREFIX#define PREFIX(ident) big2_ ## ident#define MINBPC(enc) 2/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */#define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)#define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)#define IS_NAME_CHAR(enc, p, n) 0#define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)#define IS_NMSTRT_CHAR(enc, p, n) (0)#define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)#include "xmltok_impl.c"#undef MINBPC#undef BYTE_TYPE#undef BYTE_TO_ASCII#undef CHAR_MATCHES#undef IS_NAME_CHAR#undef IS_NAME_CHAR_MINBPC#undef IS_NMSTRT_CHAR#undef IS_NMSTRT_CHAR_MINBPC#undef IS_INVALID_CHAR#endif /* not XML_MIN_SIZE */#ifdef XML_NSstatic const struct normal_encoding big2_encoding_ns = { { VTABLE, 2, 0,#if XML_BYTE_ORDER == 21 1#else 0#endif }, {#include "asciitab.h"#include "latin1tab.h" }, STANDARD_VTABLE(big2_)};#endifstatic const struct normal_encoding big2_encoding = { { VTABLE, 2, 0,#if XML_BYTE_ORDER == 21 1#else 0#endif }, {#define BT_COLON BT_NMSTRT#include "asciitab.h"#undef BT_COLON#include "latin1tab.h" }, STANDARD_VTABLE(big2_)};#if XML_BYTE_ORDER != 12#ifdef XML_NSstatic const struct normal_encoding internal_big2_encoding_ns = { { VTABLE, 2, 0, 1 }, {#include "iasciitab.h"#include "latin1tab.h" }, STANDARD_VTABLE(big2_)};#endifstatic const struct normal_encoding internal_big2_encoding = { { VTABLE, 2, 0, 1 }, {#define BT_COLON BT_NMSTRT#include "iasciitab.h"#undef BT_COLON#include "latin1tab.h" }, STANDARD_VTABLE(big2_)};#endif#undef PREFIXstaticint streqci(const char *s1, const char *s2){ for (;;) { char c1 = *s1++; char c2 = *s2++; if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; if (ASCII_a <= c2 && c2 <= ASCII_z) c2 += ASCII_A - ASCII_a; if (c1 != c2) return 0; if (!c1) break; } return 1;}staticvoid initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos){ normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);}staticint toAscii(const ENCODING *enc, const char *ptr, const char *end){ char buf[1]; char *p = buf; XmlUtf8Convert(enc, &ptr, end, &p, p + 1); if (p == buf) return -1; else return buf[0];}staticint isSpace(int c){ switch (c) { case 0x20: case 0xD: case 0xA: case 0x9: return 1; } return 0;}/* Return 1 if there's just optional white spaceor there's an S followed by name=val. */staticint parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **nameEndPtr, const char **valPtr, const char **nextTokPtr){ int c; char open; if (ptr == end) { *namePtr = 0; return 1; } if (!isSpace(toAscii(enc, ptr, end))) { *nextTokPtr = ptr; return 0; } do { ptr += enc->minBytesPerChar; } while (isSpace(toAscii(enc, ptr, end))); if (ptr == end) { *namePtr = 0; return 1; } *namePtr = ptr; for (;;) { c = toAscii(enc, ptr, end); if (c == -1) { *nextTokPtr = ptr; return 0; } if (c == ASCII_EQUALS) { *nameEndPtr = ptr; break; } if (isSpace(c)) { *nameEndPtr = ptr; do { ptr += enc->minBytesPerChar; } while (isSpace(c = toAscii(enc, ptr, end))); if (c != ASCII_EQUALS) { *nextTokPtr = ptr; return 0; } break; } ptr += enc->minBytesPerChar; } if (ptr == *namePtr) { *nextTokPtr = ptr; return 0; } ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); while (isSpace(c)) { ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); } if (c != ASCII_QUOT && c != ASCII_APOS) { *nextTokPtr = ptr; return 0; } open = c; ptr += enc->minBytesPerChar; *valPtr = ptr; for (;; ptr += enc->minBytesPerChar) { c = toAscii(enc, ptr, end); if (c == open) break; if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z) && !(ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } } *nextTokPtr = ptr + enc->minBytesPerChar; return 1;}static const char KW_version[] = { ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'};static const char KW_encoding[] = { ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0'};static const char KW_standalone[] = { ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'};static const char KW_yes[] = { ASCII_y, ASCII_e, ASCII_s, '\0'};static const char KW_no[] = { ASCII_n, ASCII_o, '\0'};staticint doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingName, const ENCODING **encoding, int *standalone){ const char *val = 0; const char *name = 0; const char *nameEnd = 0; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -