📄 xmltok_impl.c
字号:
case BT_GT: case BT_RPAR: case BT_COMMA: case BT_VERBAR: case BT_LSQB: case BT_PERCNT: case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return tok; case BT_PLUS: if (tok != XML_TOK_NAME) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC; return XML_TOK_NAME_PLUS; case BT_AST: if (tok != XML_TOK_NAME) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC; return XML_TOK_NAME_ASTERISK; case BT_QUEST: if (tok != XML_TOK_NAME) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC; return XML_TOK_NAME_QUESTION; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ const char *start; if (ptr == end) return XML_TOK_NONE; start = ptr; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LT: /* this is for inside entity references */ *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC; if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC; *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_S: if (ptr == start) { *nextTokPtr = ptr + MINBPC; return XML_TOK_ATTRIBUTE_VALUE_S; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC; break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS;}staticint PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ const char *start; if (ptr == end) return XML_TOK_NONE; start = ptr; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) return PREFIX(scanPercent)(enc, ptr + MINBPC, end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC; if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC; *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC; break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS;}staticint PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, const char **badPtr){ ptr += MINBPC; end -= MINBPC; for (; ptr != end; ptr += MINBPC) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: case BT_MINUS: case BT_APOS: case BT_LPAR: case BT_RPAR: case BT_PLUS: case BT_COMMA: case BT_SOL: case BT_EQUALS: case BT_QUEST: case BT_CR: case BT_LF: case BT_SEMI: case BT_EXCL: case BT_AST: case BT_PERCNT: case BT_NUM: break; case BT_S: if (CHAR_MATCHES(enc, ptr, '\t')) { *badPtr = ptr; return 0; } break; case BT_NAME: case BT_NMSTRT: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ case 0x40: /* @ */ break; default: *badPtr = ptr; return 0; } break; } } return 1;}/* This must only be called for a well-formed start-tag or empty element tag.Returns the number of attributes. Pointers to the first attsMax attributes are stored in atts. */staticint PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, ATTRIBUTE *atts){ enum { other, inName, inValue } state = inName; int nAtts = 0; int open; for (ptr += MINBPC;; ptr += MINBPC) { switch (BYTE_TYPE(enc, ptr)) {#define START_NAME \ if (state == other) { \ if (nAtts < attsMax) { \ atts[nAtts].name = ptr; \ atts[nAtts].normalized = 1; \ } \ state = inName; \ }#define LEAD_CASE(n) \ case BT_LEAD ## n: START_NAME ptr += (n - MINBPC); break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break;#undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC; state = inValue; open = BT_QUOT; } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_APOS: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC; state = inValue; open = BT_APOS; } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_AMP: if (nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_S: if (state == inName) state = other; else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII(enc, ptr) != ' ' || BYTE_TO_ASCII(enc, ptr + MINBPC) == ' ' || BYTE_TYPE(enc, ptr + MINBPC) == open)) atts[nAtts].normalized = 0; break; case BT_CR: case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) state = other; else if (state == inValue && nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_GT: case BT_SOL: if (state != inValue) return nAtts; break; default: break; } } /* not reached */}staticint PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr){ int result = 0; /* skip &# */ ptr += 2*MINBPC; if (CHAR_MATCHES(enc, ptr, 'x')) { for (ptr += MINBPC; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': result <<= 4; result |= (c - '0'); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': result <<= 4; result += 10 + (c - 'A'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': result <<= 4; result += 10 + (c - 'a'); break; } if (result >= 0x110000) return -1; } } else { for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - '0'); if (result >= 0x110000) return -1; } } return checkCharRefNumber(result);}staticint PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end){ switch (end - ptr) { case 2 * MINBPC: if (CHAR_MATCHES(enc, ptr + MINBPC, 't')) { switch (BYTE_TO_ASCII(enc, ptr)) { case 'l': return '<'; case 'g': return '>'; } } break; case 3 * MINBPC: if (CHAR_MATCHES(enc, ptr, 'a')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'm')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'p')) return '&'; } } break; case 4 * MINBPC: switch (BYTE_TO_ASCII(enc, ptr)) { case 'q': ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'u')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'o')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 't')) return '"'; } } break; case 'a': ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'p')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'o')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 's')) return '\''; } } break; } } return 0;}staticint PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2){ for (;;) { switch (BYTE_TYPE(enc, ptr1)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (*ptr1++ != *ptr2++) \ return 0; LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)#undef LEAD_CASE /* fall through */ if (*ptr1++ != *ptr2++) return 0; break; case BT_NONASCII: case BT_NMSTRT: case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: if (*ptr2++ != *ptr1++) return 0;#if MINBPC > 1 if (*ptr2++ != *ptr1++) return 0;#if MINBPC > 2 if (*ptr2++ != *ptr1++) return 0;#if MINBPC > 3 if (*ptr2++ != *ptr1++) return 0;#endif#endif#endif break; default:#if MINBPC == 1 if (*ptr1 == *ptr2) return 1;#endif switch (BYTE_TYPE(enc, ptr2)) { case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT: case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: return 0; default: return 1; } } } /* not reached */}staticint PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2){ for (; *ptr2; ptr1 += MINBPC, ptr2++) { if (!CHAR_MATCHES(end, ptr1, *ptr2)) return 0; } switch (BYTE_TYPE(enc, ptr1)) { case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT: case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: return 0; default: return 1; }}staticint PREFIX(nameLength)(const ENCODING *enc, const char *ptr){ const char *start = ptr; for (;;) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: ptr += MINBPC; break; default: return ptr - start; } }}staticconst char *PREFIX(skipS)(const ENCODING *enc, const char *ptr){ for (;;) { switch (BYTE_TYPE(enc, ptr)) { case BT_LF: case BT_CR: case BT_S: ptr += MINBPC; break; default: return ptr; } }}staticvoid PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos){ while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_LF: pos->columnNumber = (unsigned)-1; pos->lineNumber++; ptr += MINBPC; break; case BT_CR: pos->lineNumber++; ptr += MINBPC; if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC; pos->columnNumber = (unsigned)-1; break; default: ptr += MINBPC; break; } pos->columnNumber++; }}#undef DO_LEAD_CASE#undef MULTIBYTE_CASES#undef INVALID_CASES#undef CHECK_NAME_CASE#undef CHECK_NAME_CASES#undef CHECK_NMSTRT_CASE#undef CHECK_NMSTRT_CASES
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -