📄 xmltok.c
字号:
staticint initScan(const ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr){ const ENCODING **encPtr; if (ptr == end) return XML_TOK_NONE; encPtr = ((const INIT_ENCODING *)enc)->encPtr; if (ptr + 1 == end) { switch ((unsigned char)*ptr) { case 0xFE: case 0xFF: case 0x00: case 0x3C: return XML_TOK_PARTIAL; } } else { switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { case 0x003C: *encPtr = &big2_encoding.enc; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); case 0xFEFF: *nextTokPtr = ptr + 2; *encPtr = &big2_encoding.enc; return XML_TOK_BOM; case 0x3C00: *encPtr = &little2_encoding.enc; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); case 0xFFFE: *nextTokPtr = ptr + 2; *encPtr = &little2_encoding.enc; return XML_TOK_BOM; } } *encPtr = &utf8_encoding.enc; return XmlTok(*encPtr, state, ptr, end, nextTokPtr);}staticint initScanProlog(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ return initScan(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);}staticint initScanContent(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ return initScan(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);}staticvoid initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos){ normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);}const ENCODING *XmlGetUtf8InternalEncoding(){ return &internal_utf8_encoding.enc;}const ENCODING *XmlGetUtf16InternalEncoding(){#if BYTE_ORDER == 12 return &internal_little2_encoding.enc;#elif BYTE_ORDER == 21 return &internal_big2_encoding.enc;#else const short n = 1; return *(const char *)&n ? &internal_little2_encoding.enc : &internal_big2_encoding.enc;#endif}int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr, const char *name){ if (name) { if (streqci(name, "ISO-8859-1")) { *encPtr = &latin1_encoding.enc; return 1; } if (streqci(name, "UTF-8")) { *encPtr = &utf8_encoding.enc; return 1; } if (streqci(name, "US-ASCII")) { *encPtr = &ascii_encoding.enc; return 1; } if (!streqci(name, "UTF-16")) return 0; } p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog; p->initEnc.scanners[XML_CONTENT_STATE] = initScanContent; p->initEnc.updatePosition = initUpdatePosition; p->initEnc.minBytesPerChar = 1; p->encPtr = encPtr; *encPtr = &(p->initEnc); return 1;}staticint toAscii(const ENCODING *enc, const char *ptr, const char *end){ char buf[1]; char *p = buf; XmlUtf8Convert(enc, &ptr, end, &p, p + 1); if (p == buf) return -1; else return buf[0];}staticint isSpace(int c){ switch (c) { case ' ': case '\r': case '\n': case '\t': return 1; } return 0;}/* Return 1 if there's just optional white spaceor there's an S followed by name=val. */staticint parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **valPtr, const char **nextTokPtr){ int c; char open; if (ptr == end) { *namePtr = 0; return 1; } if (!isSpace(toAscii(enc, ptr, end))) { *nextTokPtr = ptr; return 0; } do { ptr += enc->minBytesPerChar; } while (isSpace(toAscii(enc, ptr, end))); if (ptr == end) { *namePtr = 0; return 1; } *namePtr = ptr; for (;;) { c = toAscii(enc, ptr, end); if (c == -1) { *nextTokPtr = ptr; return 0; } if (c == '=') break; if (isSpace(c)) { do { ptr += enc->minBytesPerChar; } while (isSpace(c = toAscii(enc, ptr, end))); if (c != '=') { *nextTokPtr = ptr; return 0; } break; } ptr += enc->minBytesPerChar; } if (ptr == *namePtr) { *nextTokPtr = ptr; return 0; } ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); while (isSpace(c)) { ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); } if (c != '"' && c != '\'') { *nextTokPtr = ptr; return 0; } open = c; ptr += enc->minBytesPerChar; *valPtr = ptr; for (;; ptr += enc->minBytesPerChar) { c = toAscii(enc, ptr, end); if (c == open) break; if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z') && !('0' <= c && c <= '9') && c != '.' && c != '-' && c != '_') { *nextTokPtr = ptr; return 0; } } *nextTokPtr = ptr + enc->minBytesPerChar; return 1;}staticconst ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *end){#define ENCODING_MAX 128 char buf[ENCODING_MAX]; char *p = buf; int i; XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); if (ptr != end) return 0; *p = 0; for (i = 0; buf[i]; i++) { if ('a' <= buf[i] && buf[i] <= 'z') buf[i] += 'A' - 'a'; } if (streqci(buf, "UTF-8")) return &utf8_encoding.enc; if (streqci(buf, "ISO-8859-1")) return &latin1_encoding.enc; if (streqci(buf, "US-ASCII")) return &ascii_encoding.enc; if (streqci(buf, "UTF-16")) { static const unsigned short n = 1; if (enc->minBytesPerChar == 2) return enc; return &big2_encoding.enc; } return 0; }int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingName, const ENCODING **encoding, int *standalone){ const char *val = 0; const char *name = 0; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) { *badPtr = ptr; return 0; } if (!XmlNameMatchesAscii(enc, name, "version")) { if (!isGeneralTextEntity) { *badPtr = name; return 0; } } else { if (versionPtr) *versionPtr = val; if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { *badPtr = ptr; return 0; } if (!name) { if (isGeneralTextEntity) { /* a TextDecl must have an EncodingDecl */ *badPtr = ptr; return 0; } return 1; } } if (XmlNameMatchesAscii(enc, name, "encoding")) { int c = toAscii(enc, val, end); if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) { *badPtr = val; return 0; } if (encodingName) *encodingName = val; if (encoding) *encoding = findEncoding(enc, val, ptr - enc->minBytesPerChar); if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) { *badPtr = ptr; return 0; } if (!name) return 1; } if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) { *badPtr = name; return 0; } if (XmlNameMatchesAscii(enc, val, "yes")) { if (standalone) *standalone = 1; } else if (XmlNameMatchesAscii(enc, val, "no")) { if (standalone) *standalone = 0; } else { *badPtr = val; return 0; } while (isSpace(toAscii(enc, ptr, end))) ptr += enc->minBytesPerChar; if (ptr != end) { *badPtr = ptr; return 0; } return 1;}staticint checkCharRefNumber(int result){ switch (result >> 8) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE: case 0xDF: return -1; case 0: if (latin1_encoding.type[result] == BT_NONXML) return -1; break; case 0xFF: if (result == 0xFFFE || result == 0xFFFF) return -1; break; } return result;}int XmlUtf8Encode(int c, char *buf){ enum { /* minN is minimum legal resulting value for N byte sequence */ min2 = 0x80, min3 = 0x800, min4 = 0x10000 }; if (c < 0) return 0; if (c < min2) { buf[0] = (c | UTF8_cval1); return 1; } if (c < min3) { buf[0] = ((c >> 6) | UTF8_cval2); buf[1] = ((c & 0x3f) | 0x80); return 2; } if (c < min4) { buf[0] = ((c >> 12) | UTF8_cval3); buf[1] = (((c >> 6) & 0x3f) | 0x80); buf[2] = ((c & 0x3f) | 0x80); return 3; } if (c < 0x110000) { buf[0] = ((c >> 18) | UTF8_cval4); buf[1] = (((c >> 12) & 0x3f) | 0x80); buf[2] = (((c >> 6) & 0x3f) | 0x80); buf[3] = ((c & 0x3f) | 0x80); return 4; } return 0;}int XmlUtf16Encode(int charNum, unsigned short *buf){ if (charNum < 0) return 0; if (charNum < 0x10000) { buf[0] = charNum; return 1; } if (charNum < 0x110000) { charNum -= 0x10000; buf[0] = (charNum >> 10) + 0xD800; buf[1] = (charNum & 0x3FF) + 0xDC00; return 2; } return 0;}struct unknown_encoding { struct normal_encoding normal; int (*convert)(void *userData, const char *p); void *userData; unsigned short utf16[256]; char utf8[256][4];};int XmlSizeOfUnknownEncoding(){ return sizeof(struct unknown_encoding);}staticint unknown_isName(const ENCODING *enc, const char *p){ int c = ((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);}staticint unknown_isNmstrt(const ENCODING *enc, const char *p){ int c = ((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);}staticint unknown_isInvalid(const ENCODING *enc, const char *p){ int c = ((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, p); return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;}staticvoid unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim){ char buf[XML_UTF8_ENCODE_MAX]; for (;;) { const char *utf8; int n; if (*fromP == fromLim) break; utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { int c = ((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) break; utf8 = buf; *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2); } else { if (n > toLim - *toP) break; (*fromP)++; } do { *(*toP)++ = *utf8++; } while (--n != 0); }}staticvoid unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim){ while (*fromP != fromLim && *toP != toLim) { unsigned short c = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP]; if (c == 0) { c = (unsigned short)((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2); } else (*fromP)++; *(*toP)++ = c; }}ENCODING *XmlInitUnknownEncoding(void *mem, int *table, int (*convert)(void *userData, const char *p), void *userData){ int i; struct unknown_encoding *e = mem; for (i = 0; i < sizeof(struct normal_encoding); i++) ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; for (i = 0; i < 128; i++) if (latin1_encoding.type[i] != BT_OTHER && latin1_encoding.type[i] != BT_NONXML && table[i] != i) return 0; for (i = 0; i < 256; i++) { int c = table[i]; if (c == -1) { e->normal.type[i] = BT_MALFORM; /* This shouldn't really get used. */ e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; } else if (c < 0) { if (c < -4) return 0; e->normal.type[i] = BT_LEAD2 - (c + 2); e->utf8[i][0] = 0; e->utf16[i] = 0; } else if (c < 0x80) { if (latin1_encoding.type[c] != BT_OTHER && latin1_encoding.type[c] != BT_NONXML && c != i) return 0; e->normal.type[i] = latin1_encoding.type[c]; e->utf8[i][0] = 1; e->utf8[i][1] = (char)c; e->utf16[i] = c == 0 ? 0xFFFF : c; } else if (checkCharRefNumber(c) < 0) { e->normal.type[i] = BT_NONXML; /* This shouldn't really get used. */ e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; } else { if (c > 0xFFFF) return 0; if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) e->normal.type[i] = BT_NMSTRT; else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) e->normal.type[i] = BT_NAME; else e->normal.type[i] = BT_OTHER; e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); e->utf16[i] = c; } } e->userData = userData; e->convert = convert; if (convert) { e->normal.isName2 = unknown_isName; e->normal.isName3 = unknown_isName; e->normal.isName4 = unknown_isName; e->normal.isNmstrt2 = unknown_isNmstrt; e->normal.isNmstrt3 = unknown_isNmstrt; e->normal.isNmstrt4 = unknown_isNmstrt; e->normal.isInvalid2 = unknown_isInvalid; e->normal.isInvalid3 = unknown_isInvalid; e->normal.isInvalid4 = unknown_isInvalid; } e->normal.enc.utf8Convert = unknown_toUtf8; e->normal.enc.utf16Convert = unknown_toUtf16; return &(e->normal.enc);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -