📄 xmltok_impl.c
字号:
/*Copyright (c) 1998, 1999 Thai Open Source Software Center LtdSee the file COPYING for copying permission.*/#ifndef IS_INVALID_CHAR#define IS_INVALID_CHAR(enc, ptr, n) (0)#endif#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (IS_INVALID_CHAR(enc, ptr, n)) { \ *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; \ } \ ptr += n; \ break;#define INVALID_CASES(ptr, nextTokPtr) \ INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ case BT_NONXML: \ case BT_MALFORM: \ case BT_TRAIL: \ *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID;#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (!IS_NAME_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ ptr += n; \ break;#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ case BT_NONASCII: \ if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ case BT_NMSTRT: \ case BT_HEX: \ case BT_DIGIT: \ case BT_NAME: \ case BT_MINUS: \ ptr += MINBPC(enc); \ break; \ CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ ptr += n; \ break;#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ case BT_NONASCII: \ if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ case BT_NMSTRT: \ case BT_HEX: \ ptr += MINBPC(enc); \ break; \ CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)#ifndef PREFIX#define PREFIX(ident) ident#endif/* ptr points to character following "<!-" */staticint PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr != end) { if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_MINUS: if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_COMMENT; } break; default: ptr += MINBPC(enc); break; } } } return XML_TOK_PARTIAL;}/* ptr points to character following "<!" */staticint PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LSQB: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_COND_SECT_OPEN; case BT_NMSTRT: case BT_HEX: ptr += MINBPC(enc); break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { case BT_PERCNT: if (ptr + MINBPC(enc) == end) return XML_TOK_PARTIAL; /* don't allow <!ENTITY% foo "whatever"> */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* fall through */ case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return XML_TOK_DECL_OPEN; case BT_NMSTRT: case BT_HEX: ptr += MINBPC(enc); break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr){ int upper = 0; *tokPtr = XML_TOK_PI; if (end - ptr != MINBPC(enc)*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_x: break; case ASCII_X: upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_m: break; case ASCII_M: upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_l: break; case ASCII_L: upper = 1; break; default: return 1; } if (upper) return 0; *tokPtr = XML_TOK_XML_DECL; return 1;}/* ptr points to character following "<?" */staticint PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ int tok; const char *target = ptr; if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; } break; default: ptr += MINBPC(enc); break; } } return XML_TOK_PARTIAL; case BT_QUEST: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; } /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ if (end - ptr < 6 * MINBPC(enc)) return XML_TOK_PARTIAL; for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; return XML_TOK_INVALID; } } *nextTokPtr = ptr; return XML_TOK_CDATA_SECT_OPEN;}staticint PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } } switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_NONXML: case BT_MALFORM: case BT_TRAIL: case BT_CR: case BT_LF: case BT_RSQB: *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS;}/* ptr points to character following "</" */staticint PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; case BT_GT: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_END_TAG; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;#ifdef XML_NS case BT_COLON: /* no need to check qname syntax here, since end-tag must match exactly */ ptr += MINBPC(enc); break;#endif case BT_GT: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_END_TAG; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}/* ptr points to character following "&#X" */staticint PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: break; case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CHAR_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } } return XML_TOK_PARTIAL;}/* ptr points to character following "&#" */staticint PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr != end) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CHAR_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } } return XML_TOK_PARTIAL;}/* ptr points to character following "&" */staticint PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_NUM: return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_ENTITY_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}/* ptr points to character following first character of attribute name */staticint PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){#ifdef XML_NS int hadColon = 0;#endif while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)#ifdef XML_NS case BT_COLON: if (hadColon) { *nextTokPtr = ptr; return XML_TOK_INVALID; } hadColon = 1; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } break;#endif case BT_S: case BT_CR: case BT_LF: for (;;) { int t; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; t = BYTE_TYPE(enc, ptr); if (t == BT_EQUALS) break; switch (t) { case BT_S: case BT_LF: case BT_CR: break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } /* fall through */ case BT_EQUALS: { int open;#ifdef XML_NS hadColon = 0;#endif for (;;) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; open = BYTE_TYPE(enc, ptr); if (open == BT_QUOT || open == BT_APOS) break; switch (open) { case BT_S: case BT_LF: case BT_CR: break; default: *nextTokPtr = ptr;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -