📄 xmltok_impl.c
字号:
*nextTokPtr = ptr; return tok;#ifdef XML_NS case BT_COLON: ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: if (ptr == end) return XML_TOK_PARTIAL; tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) default: tok = XML_TOK_NMTOKEN; break; } break; case XML_TOK_PREFIXED_NAME: tok = XML_TOK_NMTOKEN; break; } break;#endif case BT_PLUS: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_QUESTION; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ const char *start; if (ptr == end) return XML_TOK_NONE; start = ptr; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LT: /* this is for inside entity references */ *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_S: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_ATTRIBUTE_VALUE_S; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS;}staticint PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ const char *start; if (ptr == end) return XML_TOK_NONE; start = ptr; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS;}staticint PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, const char **badPtr){ ptr += MINBPC(enc); end -= MINBPC(enc); for (; ptr != end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: case BT_MINUS: case BT_APOS: case BT_LPAR: case BT_RPAR: case BT_PLUS: case BT_COMMA: case BT_SOL: case BT_EQUALS: case BT_QUEST: case BT_CR: case BT_LF: case BT_SEMI: case BT_EXCL: case BT_AST: case BT_PERCNT: case BT_NUM:#ifdef XML_NS case BT_COLON:#endif break; case BT_S: if (CHAR_MATCHES(enc, ptr, '\t')) { *badPtr = ptr; return 0; } break; case BT_NAME: case BT_NMSTRT: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ case 0x40: /* @ */ break; default: *badPtr = ptr; return 0; } break; } } return 1;}/* This must only be called for a well-formed start-tag or empty element tag.Returns the number of attributes. Pointers to the first attsMax attributes are stored in atts. */staticint PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, ATTRIBUTE *atts){ enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) {#define START_NAME \ if (state == other) { \ if (nAtts < attsMax) { \ atts[nAtts].name = ptr; \ atts[nAtts].normalized = 1; \ } \ state = inName; \ }#define LEAD_CASE(n) \ case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break;#undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_QUOT; } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_APOS: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_APOS; } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_AMP: if (nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_S: if (state == inName) state = other; else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII(enc, ptr) != ' ' || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' ' || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) atts[nAtts].normalized = 0; break; case BT_CR: case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) state = other; else if (state == inValue && nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_GT: case BT_SOL: if (state != inValue) return nAtts; break; default: break; } } /* not reached */}staticint PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr){ (void)enc; int result = 0; /* skip &# */ ptr += 2*MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 'x')) { for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': result <<= 4; result |= (c - '0'); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': result <<= 4; result += 10 + (c - 'A'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': result <<= 4; result += 10 + (c - 'a'); break; } if (result >= 0x110000) return -1; } } else { for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - '0'); if (result >= 0x110000) return -1; } } return checkCharRefNumber(result);}staticint PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end){ (void)enc; switch ((end - ptr)/MINBPC(enc)) { case 2: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) { switch (BYTE_TO_ASCII(enc, ptr)) { case 'l': return '<'; case 'g': return '>'; } } break; case 3: if (CHAR_MATCHES(enc, ptr, 'a')) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 'm')) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 'p')) return '&'; } } break; case 4: switch (BYTE_TO_ASCII(enc, ptr)) { case 'q': ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 'u')) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 'o')) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 't')) return '"'; } } break; case 'a': ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 'p')) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 'o')) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 's')) return '\''; } } break; } } return 0;}staticint PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2){ for (;;) { switch (BYTE_TYPE(enc, ptr1)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (*ptr1++ != *ptr2++) \ return 0; LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)#undef LEAD_CASE /* fall through */ if (*ptr1++ != *ptr2++) return 0; break; case BT_NONASCII: case BT_NMSTRT:#ifdef XML_NS case BT_COLON:#endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 1) { if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 2) { if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 3) { if (*ptr2++ != *ptr1++) return 0; } } } break; default: if (MINBPC(enc) == 1 && *ptr1 == *ptr2) return 1; switch (BYTE_TYPE(enc, ptr2)) { case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT:#ifdef XML_NS case BT_COLON:#endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: return 0; default: return 1; } } } /* not reached */}staticint PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2){ for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } switch (BYTE_TYPE(enc, ptr1)) { case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT:#ifdef XML_NS case BT_COLON:#endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: return 0; default: return 1; }}staticint PREFIX(nameLength)(const ENCODING *enc, const char *ptr){ const char *start = ptr; for (;;) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT:#ifdef XML_NS case BT_COLON:#endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: ptr += MINBPC(enc); break; default: return ptr - start; } }}staticconst char *PREFIX(skipS)(const ENCODING *enc, const char *ptr){ for (;;) { switch (BYTE_TYPE(enc, ptr)) { case BT_LF: case BT_CR: case BT_S: ptr += MINBPC(enc); break; default: return ptr; } }}staticvoid PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos){ while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_LF: pos->columnNumber = (unsigned)-1; pos->lineNumber++; ptr += MINBPC(enc); break; case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); pos->columnNumber = (unsigned)-1; break; default: ptr += MINBPC(enc); break; } pos->columnNumber++; }}#undef DO_LEAD_CASE#undef MULTIBYTE_CASES#undef INVALID_CASES#undef CHECK_NAME_CASE#undef CHECK_NAME_CASES#undef CHECK_NMSTRT_CASE#undef CHECK_NMSTRT_CASES
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -