📄 xmltok_impl.c
字号:
/*The contents of this file are subject to the Mozilla Public LicenseVersion 1.0 (the "License"); you may not use this file except incompliance with the License. You may obtain a copy of the License athttp://www.mozilla.org/MPL/Software distributed under the License is distributed on an "AS IS"basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See theLicense for the specific language governing rights and limitationsunder the License.The Original Code is expat.The Initial Developer of the Original Code is James Clark.Portions created by James Clark are Copyright (C) 1998James Clark. All Rights Reserved.Contributor(s):*/#ifndef IS_INVALID_CHAR#define IS_INVALID_CHAR(enc, ptr, n) (0)#endif#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (IS_INVALID_CHAR(enc, ptr, n)) { \ *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; \ } \ ptr += n; \ break;#define INVALID_CASES(ptr, nextTokPtr) \ INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ case BT_NONXML: \ case BT_MALFORM: \ case BT_TRAIL: \ *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID;#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (!IS_NAME_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ ptr += n; \ break;#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ case BT_NONASCII: \ if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ case BT_NMSTRT: \ case BT_HEX: \ case BT_DIGIT: \ case BT_NAME: \ case BT_MINUS: \ ptr += MINBPC; \ break; \ CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ ptr += n; \ break;#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ case BT_NONASCII: \ if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ case BT_NMSTRT: \ case BT_HEX: \ ptr += MINBPC; \ break; \ CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)#ifndef PREFIX#define PREFIX(ident) ident#endif/* ptr points to character following "<!-" */staticint PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr != end) { if (!CHAR_MATCHES(enc, ptr, '-')) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_MINUS: if ((ptr += MINBPC) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, '-')) { if ((ptr += MINBPC) == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, '>')) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC; return XML_TOK_COMMENT; } break; default: ptr += MINBPC; break; } } } return XML_TOK_PARTIAL;}/* ptr points to character following "<!" */staticint PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC, end, nextTokPtr); case BT_LSQB: *nextTokPtr = ptr + MINBPC; return XML_TOK_COND_SECT_OPEN; case BT_NMSTRT: case BT_HEX: ptr += MINBPC; break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { case BT_PERCNT: if (ptr + MINBPC == end) return XML_TOK_PARTIAL; /* don't allow <!ENTITY% foo "whatever"> */ switch (BYTE_TYPE(enc, ptr + MINBPC)) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* fall through */ case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return XML_TOK_DECL_OPEN; case BT_NMSTRT: case BT_HEX: ptr += MINBPC; break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr){ int upper = 0; *tokPtr = XML_TOK_PI; if (end - ptr != MINBPC*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case 'x': break; case 'X': upper = 1; break; default: return 1; } ptr += MINBPC; switch (BYTE_TO_ASCII(enc, ptr)) { case 'm': break; case 'M': upper = 1; break; default: return 1; } ptr += MINBPC; switch (BYTE_TO_ASCII(enc, ptr)) { case 'l': break; case 'L': upper = 1; break; default: return 1; } if (upper) return 0; *tokPtr = XML_TOK_XML_DECL; return 1;}/* ptr points to character following "<?" */staticint PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ int tok; const char *target = ptr; if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, '>')) { *nextTokPtr = ptr + MINBPC; return tok; } break; default: ptr += MINBPC; break; } } return XML_TOK_PARTIAL; case BT_QUEST: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, '>')) { *nextTokPtr = ptr + MINBPC; return tok; } /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ int i; /* CDATA[ */ if (end - ptr < 6 * MINBPC) return XML_TOK_PARTIAL; for (i = 0; i < 6; i++, ptr += MINBPC) { if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) { *nextTokPtr = ptr; return XML_TOK_INVALID; } } *nextTokPtr = ptr; return XML_TOK_CDATA_SECT_OPEN;}staticint PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_NONE;#if MINBPC > 1 { size_t n = end - ptr; if (n & (MINBPC - 1)) { n &= ~(MINBPC - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } }#endif switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ']')) break; ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, '>')) { ptr -= MINBPC; break; } *nextTokPtr = ptr + MINBPC; return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC; *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; case BT_LF: *nextTokPtr = ptr + MINBPC; return XML_TOK_DATA_NEWLINE; INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC; break; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_NONXML: case BT_MALFORM: case BT_TRAIL: case BT_CR: case BT_LF: case BT_RSQB: *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC; break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS;}/* ptr points to character following "</" */staticint PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: for (ptr += MINBPC; ptr != end; ptr += MINBPC) { switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; case BT_GT: *nextTokPtr = ptr + MINBPC; return XML_TOK_END_TAG; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; case BT_GT: *nextTokPtr = ptr + MINBPC; return XML_TOK_END_TAG; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}/* ptr points to character following "&#X" */staticint PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } for (ptr += MINBPC; ptr != end; ptr += MINBPC) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: break; case BT_SEMI: *nextTokPtr = ptr + MINBPC; return XML_TOK_CHAR_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } } return XML_TOK_PARTIAL;}/* ptr points to character following "&#" */staticint PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr != end) { if (CHAR_MATCHES(enc, ptr, 'x')) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC, end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } for (ptr += MINBPC; ptr != end; ptr += MINBPC) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; case BT_SEMI: *nextTokPtr = ptr + MINBPC; return XML_TOK_CHAR_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } } return XML_TOK_PARTIAL;}/* ptr points to character following "&" */staticint PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_NUM: return PREFIX(scanCharRef)(enc, ptr + MINBPC, end, nextTokPtr); default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC; return XML_TOK_ENTITY_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}/* ptr points to character following first character of attribute name */staticint PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: for (;;) { int t; ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; t = BYTE_TYPE(enc, ptr); if (t == BT_EQUALS) break; switch (t) { case BT_S: case BT_LF: case BT_CR:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -