📄 xmltok_impl.c
字号:
break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } /* fall through */ case BT_EQUALS: { int open; for (;;) { ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; open = BYTE_TYPE(enc, ptr); if (open == BT_QUOT || open == BT_APOS) break; switch (open) { case BT_S: case BT_LF: case BT_CR: break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } ptr += MINBPC; /* in attribute value */ for (;;) { int t; if (ptr == end) return XML_TOK_PARTIAL; t = BYTE_TYPE(enc, ptr); if (t == open) break; switch (t) { INVALID_CASES(ptr, nextTokPtr) case BT_AMP: { int tok = PREFIX(scanRef)(enc, ptr + MINBPC, end, &ptr); if (tok <= 0) { if (tok == XML_TOK_INVALID) *nextTokPtr = ptr; return tok; } break; } case BT_LT: *nextTokPtr = ptr; return XML_TOK_INVALID; default: ptr += MINBPC; break; } } ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; case BT_SOL: goto sol; case BT_GT: goto gt; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* ptr points to closing quote */ for (;;) { ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: continue; case BT_GT: gt: *nextTokPtr = ptr + MINBPC; return XML_TOK_START_TAG_WITH_ATTS; case BT_SOL: sol: ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, '>')) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC; return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } break; } break; } default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}/* ptr points to character following "<" */staticint PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_EXCL: if ((ptr += MINBPC) == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC, end, nextTokPtr); case BT_LSQB: return PREFIX(scanCdataSection)(enc, ptr + MINBPC, end, nextTokPtr); } *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_QUEST: return PREFIX(scanPi)(enc, ptr + MINBPC, end, nextTokPtr); case BT_SOL: return PREFIX(scanEndTag)(enc, ptr + MINBPC, end, nextTokPtr); default: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* we have a start-tag */ while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_GT: goto gt; case BT_SOL: goto sol; case BT_S: case BT_CR: case BT_LF: ptr += MINBPC; continue; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); } return XML_TOK_PARTIAL; } case BT_GT: gt: *nextTokPtr = ptr + MINBPC; return XML_TOK_START_TAG_NO_ATTS; case BT_SOL: sol: ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, '>')) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC; return XML_TOK_EMPTY_ELEMENT_NO_ATTS; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_NONE;#if MINBPC > 1 { size_t n = end - ptr; if (n & (MINBPC - 1)) { n &= ~(MINBPC - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } }#endif switch (BYTE_TYPE(enc, ptr)) { case BT_LT: return PREFIX(scanLt)(enc, ptr + MINBPC, end, nextTokPtr); case BT_AMP: return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr); case BT_CR: ptr += MINBPC; if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC; *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; case BT_LF: *nextTokPtr = ptr + MINBPC; return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC; if (ptr == end) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ']')) break; ptr += MINBPC; if (ptr == end) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, '>')) { ptr -= MINBPC; break; } *nextTokPtr = ptr; return XML_TOK_INVALID; INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC; break; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) {#define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_RSQB: if (ptr + MINBPC != end) { if (!CHAR_MATCHES(enc, ptr + MINBPC, ']')) { ptr += MINBPC; break; } if (ptr + 2*MINBPC != end) { if (!CHAR_MATCHES(enc, ptr + 2*MINBPC, '>')) { ptr += MINBPC; break; } *nextTokPtr = ptr + 2*MINBPC; return XML_TOK_INVALID; } } /* fall through */ case BT_AMP: case BT_LT: case BT_NONXML: case BT_MALFORM: case BT_TRAIL: case BT_CR: case BT_LF: *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC; break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS;}/* ptr points to character following "%" */staticint PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_PERCENT; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC; return XML_TOK_PARAM_ENTITY_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_CR: case BT_LF: case BT_S: case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: *nextTokPtr = ptr; return XML_TOK_POUND_NAME; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL;}staticint PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ while (ptr != end) { int t = BYTE_TYPE(enc, ptr); switch (t) { INVALID_CASES(ptr, nextTokPtr) case BT_QUOT: case BT_APOS: ptr += MINBPC; if (t != open) break; if (ptr == end) return XML_TOK_PARTIAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: case BT_GT: case BT_PERCNT: case BT_LSQB: return XML_TOK_LITERAL; default: return XML_TOK_INVALID; } default: ptr += MINBPC; break; } } return XML_TOK_PARTIAL;}staticint PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr){ int tok; if (ptr == end) return XML_TOK_NONE;#if MINBPC > 1 { size_t n = end - ptr; if (n & (MINBPC - 1)) { n &= ~(MINBPC - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } }#endif switch (BYTE_TYPE(enc, ptr)) { case BT_QUOT: return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC, end, nextTokPtr); case BT_APOS: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC, end, nextTokPtr); case BT_LT: { ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: return PREFIX(scanDecl)(enc, ptr + MINBPC, end, nextTokPtr); case BT_QUEST: return PREFIX(scanPi)(enc, ptr + MINBPC, end, nextTokPtr); case BT_NMSTRT: case BT_HEX: case BT_NONASCII: case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: *nextTokPtr = ptr - MINBPC; return XML_TOK_INSTANCE_START; } *nextTokPtr = ptr; return XML_TOK_INVALID; } case BT_CR: if (ptr + MINBPC == end) return XML_TOK_TRAILING_CR; /* fall through */ case BT_S: case BT_LF: for (;;) { ptr += MINBPC; if (ptr == end) break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: break; case BT_CR: /* don't split CR/LF pair */ if (ptr + MINBPC != end) break; /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_PROLOG_S; } } *nextTokPtr = ptr; return XML_TOK_PROLOG_S; case BT_PERCNT: return PREFIX(scanPercent)(enc, ptr + MINBPC, end, nextTokPtr); case BT_COMMA: *nextTokPtr = ptr + MINBPC; return XML_TOK_COMMA; case BT_LSQB: *nextTokPtr = ptr + MINBPC; return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ']')) { if (ptr + MINBPC == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr + MINBPC, '>')) { *nextTokPtr = ptr + 2*MINBPC; return XML_TOK_COND_SECT_CLOSE; } } *nextTokPtr = ptr; return XML_TOK_CLOSE_BRACKET; case BT_LPAR: *nextTokPtr = ptr + MINBPC; return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC; if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: *nextTokPtr = ptr + MINBPC; return XML_TOK_CLOSE_PAREN_ASTERISK; case BT_QUEST: *nextTokPtr = ptr + MINBPC; return XML_TOK_CLOSE_PAREN_QUESTION; case BT_PLUS: *nextTokPtr = ptr + MINBPC; return XML_TOK_CLOSE_PAREN_PLUS; case BT_CR: case BT_LF: case BT_S: case BT_GT: case BT_COMMA: case BT_VERBAR: case BT_RPAR: *nextTokPtr = ptr; return XML_TOK_CLOSE_PAREN; } *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_VERBAR: *nextTokPtr = ptr + MINBPC; return XML_TOK_OR; case BT_GT: *nextTokPtr = ptr + MINBPC; return XML_TOK_DECL_CLOSE; case BT_NUM: return PREFIX(scanPoundName)(enc, ptr + MINBPC, end, nextTokPtr);#define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NAME; \ break; \ } \ if (IS_NAME_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NMTOKEN; \ break; \ } \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)#undef LEAD_CASE case BT_NMSTRT: case BT_HEX: tok = XML_TOK_NAME; ptr += MINBPC; break; case BT_DIGIT: case BT_NAME: case BT_MINUS: tok = XML_TOK_NMTOKEN; ptr += MINBPC; break; case BT_NONASCII: if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { ptr += MINBPC; tok = XML_TOK_NAME; break; } if (IS_NAME_CHAR_MINBPC(enc, ptr)) { ptr += MINBPC; tok = XML_TOK_NMTOKEN; break; } /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -