📄 htmlparser.c
字号:
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "htmlParseEntityRef: no name\n"); ctxt->wellFormed = 0; } else { GROW; if (CUR == ';') { *str = name; /* * Lookup the entity in the table. */ ent = htmlEntityLookup(name); if (ent != NULL) /* OK that's ugly !!! */ NEXT; } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "htmlParseEntityRef: expecting ';'\n"); *str = name; } } } return(ent);}/** * htmlParseAttValue: * @ctxt: an HTML parser context * * parse a value for an attribute * Note: the parser won't do substitution of entities here, this * will be handled later in xmlStringGetNodeList, unless it was * asked for ctxt->replaceEntities != 0 * * Returns the AttValue parsed or NULL. */xmlChar *htmlParseAttValue(htmlParserCtxtPtr ctxt) { xmlChar *ret = NULL; if (CUR == '"') { NEXT; ret = htmlParseHTMLAttribute(ctxt, '"'); if (CUR != '"') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); ctxt->wellFormed = 0; } else NEXT; } else if (CUR == '\'') { NEXT; ret = htmlParseHTMLAttribute(ctxt, '\''); if (CUR != '\'') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); ctxt->wellFormed = 0; } else NEXT; } else { /* * That's an HTMLism, the attribute value may not be quoted */ ret = htmlParseHTMLAttribute(ctxt, 0); if (ret == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: no value found\n"); ctxt->wellFormed = 0; } } return(ret);}/** * htmlParseSystemLiteral: * @ctxt: an HTML parser context * * parse an HTML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") * * Returns the SystemLiteral parsed or NULL */xmlChar *htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { const xmlChar *q; xmlChar *ret = NULL; if (CUR == '"') { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '"')) NEXT; if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else if (CUR == '\'') { NEXT; q = CUR_PTR; while ((IS_CHAR(CUR)) && (CUR != '\'')) NEXT; if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n"); ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); ctxt->wellFormed = 0; } return(ret);}/** * htmlParsePubidLiteral: * @ctxt: an HTML parser context * * parse an HTML public literal * * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" * * Returns the PubidLiteral parsed or NULL. */xmlChar *htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { const xmlChar *q; xmlChar *ret = NULL; /* * Name ::= (Letter | '_') (NameChar)* */ if (CUR == '"') { NEXT; q = CUR_PTR; while (IS_PUBIDCHAR(CUR)) NEXT; if (CUR != '"') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else if (CUR == '\'') { NEXT; q = CUR_PTR; while ((IS_LETTER(CUR)) && (CUR != '\'')) NEXT; if (!IS_LETTER(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n"); ctxt->wellFormed = 0; } else { ret = xmlStrndup(q, CUR_PTR - q); NEXT; } } else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n"); ctxt->wellFormed = 0; } return(ret);}/** * htmlParseCharData: * @ctxt: an HTML parser context * @cdata: int indicating whether we are within a CDATA section * * parse a CharData section. * if we are within a CDATA section ']]>' marks an end of section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */voidhtmlParseCharData(htmlParserCtxtPtr ctxt, int cdata) { xmlChar *buf = NULL; int len = 0; int size = HTML_PARSER_BUFFER_SIZE; xmlChar q; buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); return; } q = CUR; while ((IS_CHAR(q)) && (q != '<') && (q != '&')) { if ((q == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { if (cdata) break; else { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Sequence ']]>' not allowed in content\n"); ctxt->wellFormed = 0; } } if (len + 1 >= size) { size *= 2; buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); return; } } buf[len++] = q; NEXT; q = CUR; } if (len == 0) { xmlFree(buf); return; } /* * Ok the buffer is to be consumed as chars. */ if (ctxt->sax != NULL) { if (areBlanks(ctxt, buf, len)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, len); } else { if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, buf, len); } } xmlFree(buf);}/** * htmlParseExternalID: * @ctxt: an HTML parser context * @publicID: a xmlChar** receiving PubidLiteral * @strict: indicate whether we should restrict parsing to only * production [75], see NOTE below * * Parse an External ID or a Public ID * * NOTE: Productions [75] and [83] interract badly since [75] can generate * 'PUBLIC' S PubidLiteral S SystemLiteral * * [75] ExternalID ::= 'SYSTEM' S SystemLiteral * | 'PUBLIC' S PubidLiteral S SystemLiteral * * [83] PublicID ::= 'PUBLIC' S PubidLiteral * * Returns the function returns SystemLiteral and in the second * case publicID receives PubidLiteral, is strict is off * it is possible to return NULL and have publicID set. */xmlChar *htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { xmlChar *URI = NULL; if ((UPPER == 'S') && (UPP(1) == 'Y') && (UPP(2) == 'S') && (UPP(3) == 'T') && (UPP(4) == 'E') && (UPP(5) == 'M')) { SKIP(6); if (!IS_BLANK(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Space required after 'SYSTEM'\n"); ctxt->wellFormed = 0; } SKIP_BLANKS; URI = htmlParseSystemLiteral(ctxt); if (URI == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "htmlParseExternalID: SYSTEM, no URI\n"); ctxt->wellFormed = 0; } } else if ((UPPER == 'P') && (UPP(1) == 'U') && (UPP(2) == 'B') && (UPP(3) == 'L') && (UPP(4) == 'I') && (UPP(5) == 'C')) { SKIP(6); if (!IS_BLANK(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Space required after 'PUBLIC'\n"); ctxt->wellFormed = 0; } SKIP_BLANKS; *publicID = htmlParsePubidLiteral(ctxt); if (*publicID == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "htmlParseExternalID: PUBLIC, no Public Identifier\n"); ctxt->wellFormed = 0; } SKIP_BLANKS; if ((CUR == '"') || (CUR == '\'')) { URI = htmlParseSystemLiteral(ctxt); } } return(URI);}/** * htmlParseComment: * @ctxt: an HTML parser context * * Parse an XML (SGML) comment <!-- .... --> * * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */voidhtmlParseComment(htmlParserCtxtPtr ctxt) { xmlChar *buf = NULL; int len = 0; int size = HTML_PARSER_BUFFER_SIZE; register xmlChar s, r, q; /* * Check that there is a comment right here. */ if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '-') || (NXT(3) != '-')) return; buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "malloc of %d byte failed\n", size); return; } q = r = '-'; /* 0 or '-' to cover our ass against <!--> and <!---> ? !!! */ SKIP(4); s = CUR; while (IS_CHAR(s) && ((s != '>') || (r != '-') || (q != '-'))) { if (len + 1 >= size) { size *= 2; buf = xmlRealloc(buf, size * sizeof(xmlChar)); if (buf == NULL) { fprintf(stderr, "realloc of %d byte failed\n", size); return; } } buf[len++] = s; NEXT; q = r; r = s; s = CUR; } buf[len - 2] = 0; if (!IS_CHAR(s)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", buf); ctxt->wellFormed = 0; } else { NEXT; if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL)) { ctxt->sax->comment(ctxt->userData, buf); } } xmlFree(buf);}/** * htmlParseCharRef: * @ctxt: an HTML parser context * * parse Reference declarations * * [66] CharRef ::= '&#' [0-9]+ ';' | * '&#x' [0-9a-fA-F]+ ';' * * Returns the value parsed (as an int) */inthtmlParseCharRef(htmlParserCtxtPtr ctxt) { int val = 0; if ((CUR == '&') && (NXT(1) == '#') && (NXT(2) == 'x')) { SKIP(3); while (CUR != ';') { if ((CUR >= '0') && (CUR <= '9')) val = val
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -