htmlparser.c

来自「SIP 1.5.0源代码」· C语言代码 · 共 1,997 行 · 第 1/5 页
1,997 行
	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	        ctxt->sax->error(ctxt->userData, "htmlParseEntityRef: no name\n");	    ctxt->wellFormed = 0;	} else {	    GROW;	    if (CUR == ';') {		*str = name;		/*		 * Lookup the entity in the table.		 */		ent = htmlEntityLookup(name);		if (ent != NULL) /* OK that's ugly !!! */		    NEXT;	    } else {		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))		    ctxt->sax->error(ctxt->userData,		                     "htmlParseEntityRef: expecting ';'\n");		*str = name;	    }	}    }    return(ent);}/** * htmlParseAttValue: * @ctxt:  an HTML parser context * * parse a value for an attribute * Note: the parser won't do substitution of entities here, this * will be handled later in xmlStringGetNodeList, unless it was * asked for ctxt->replaceEntities != 0  * * Returns the AttValue parsed or NULL. */xmlChar *htmlParseAttValue(htmlParserCtxtPtr ctxt) {    xmlChar *ret = NULL;    if (CUR == '"') {        NEXT;	ret = htmlParseHTMLAttribute(ctxt, '"');        if (CUR != '"') {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))		ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");	    ctxt->wellFormed = 0;	} else	    NEXT;    } else if (CUR == '\'') {        NEXT;	ret = htmlParseHTMLAttribute(ctxt, '\'');        if (CUR != '\'') {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))		ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");	    ctxt->wellFormed = 0;	} else	    NEXT;    } else {        /*	 * That's an HTMLism, the attribute value may not be quoted	 */	ret = htmlParseHTMLAttribute(ctxt, 0);	if (ret == NULL) {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))		ctxt->sax->error(ctxt->userData, "AttValue: no value found\n");	    ctxt->wellFormed = 0;	}    }    return(ret);}/** * htmlParseSystemLiteral: * @ctxt:  an HTML parser context *  * parse an HTML Literal * * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") * * Returns the SystemLiteral parsed or NULL */xmlChar *htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {    const xmlChar *q;    xmlChar *ret = NULL;    if (CUR == '"') {        NEXT;	q = CUR_PTR;	while ((IS_CHAR(CUR)) && (CUR != '"'))	    NEXT;	if (!IS_CHAR(CUR)) {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	        ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");	    ctxt->wellFormed = 0;	} else {	    ret = xmlStrndup(q, CUR_PTR - q);	    NEXT;        }    } else if (CUR == '\'') {        NEXT;	q = CUR_PTR;	while ((IS_CHAR(CUR)) && (CUR != '\''))	    NEXT;	if (!IS_CHAR(CUR)) {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	        ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");	    ctxt->wellFormed = 0;	} else {	    ret = xmlStrndup(q, CUR_PTR - q);	    NEXT;        }    } else {	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	    ctxt->sax->error(ctxt->userData,	                     "SystemLiteral \" or ' expected\n");	ctxt->wellFormed = 0;    }        return(ret);}/** * htmlParsePubidLiteral: * @ctxt:  an HTML parser context * * parse an HTML public literal * * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" * * Returns the PubidLiteral parsed or NULL. */xmlChar *htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {    const xmlChar *q;    xmlChar *ret = NULL;    /*     * Name ::= (Letter | '_') (NameChar)*     */    if (CUR == '"') {        NEXT;	q = CUR_PTR;	while (IS_PUBIDCHAR(CUR)) NEXT;	if (CUR != '"') {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	        ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");	    ctxt->wellFormed = 0;	} else {	    ret = xmlStrndup(q, CUR_PTR - q);	    NEXT;	}    } else if (CUR == '\'') {        NEXT;	q = CUR_PTR;	while ((IS_LETTER(CUR)) && (CUR != '\''))	    NEXT;	if (!IS_LETTER(CUR)) {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	        ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");	    ctxt->wellFormed = 0;	} else {	    ret = xmlStrndup(q, CUR_PTR - q);	    NEXT;	}    } else {	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	    ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");	ctxt->wellFormed = 0;    }        return(ret);}/** * htmlParseCharData: * @ctxt:  an HTML parser context * @cdata:  int indicating whether we are within a CDATA section * * parse a CharData section. * if we are within a CDATA section ']]>' marks an end of section. * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */voidhtmlParseCharData(htmlParserCtxtPtr ctxt, int cdata) {    xmlChar *buf = NULL;    int len = 0;    int size = HTML_PARSER_BUFFER_SIZE;    xmlChar q;    buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));    if (buf == NULL) {	fprintf(stderr, "malloc of %d byte failed\n", size);	return;    }    q = CUR;    while ((IS_CHAR(q)) && (q != '<') &&           (q != '&')) {	if ((q == ']') && (NXT(1) == ']') &&	    (NXT(2) == '>')) {	    if (cdata) break;	    else {		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))		    ctxt->sax->error(ctxt->userData,		       "Sequence ']]>' not allowed in content\n");		ctxt->wellFormed = 0;	    }	}	if (len + 1 >= size) {	    size *= 2;	    buf = xmlRealloc(buf, size * sizeof(xmlChar));	    if (buf == NULL) {		fprintf(stderr, "realloc of %d byte failed\n", size);		return;	    }	}	buf[len++] = q;        NEXT;	q = CUR;    }    if (len == 0) {	xmlFree(buf);	return;    }    /*     * Ok the buffer is to be consumed as chars.     */    if (ctxt->sax != NULL) {	if (areBlanks(ctxt, buf, len)) {	    if (ctxt->sax->ignorableWhitespace != NULL)		ctxt->sax->ignorableWhitespace(ctxt->userData, buf, len);	} else {	    if (ctxt->sax->characters != NULL)		ctxt->sax->characters(ctxt->userData, buf, len);        }    }    xmlFree(buf);}/** * htmlParseExternalID: * @ctxt:  an HTML parser context * @publicID:  a xmlChar** receiving PubidLiteral * @strict: indicate whether we should restrict parsing to only *          production [75], see NOTE below * * Parse an External ID or a Public ID * * NOTE: Productions [75] and [83] interract badly since [75] can generate *       'PUBLIC' S PubidLiteral S SystemLiteral * * [75] ExternalID ::= 'SYSTEM' S SystemLiteral *                   | 'PUBLIC' S PubidLiteral S SystemLiteral * * [83] PublicID ::= 'PUBLIC' S PubidLiteral * * Returns the function returns SystemLiteral and in the second *                case publicID receives PubidLiteral, is strict is off *                it is possible to return NULL and have publicID set. */xmlChar *htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {    xmlChar *URI = NULL;    if ((UPPER == 'S') && (UPP(1) == 'Y') &&         (UPP(2) == 'S') && (UPP(3) == 'T') &&	 (UPP(4) == 'E') && (UPP(5) == 'M')) {        SKIP(6);	if (!IS_BLANK(CUR)) {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))		ctxt->sax->error(ctxt->userData,		    "Space required after 'SYSTEM'\n");	    ctxt->wellFormed = 0;	}        SKIP_BLANKS;	URI = htmlParseSystemLiteral(ctxt);	if (URI == NULL) {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	        ctxt->sax->error(ctxt->userData,	          "htmlParseExternalID: SYSTEM, no URI\n");	    ctxt->wellFormed = 0;        }    } else if ((UPPER == 'P') && (UPP(1) == 'U') &&	       (UPP(2) == 'B') && (UPP(3) == 'L') &&	       (UPP(4) == 'I') && (UPP(5) == 'C')) {        SKIP(6);	if (!IS_BLANK(CUR)) {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))		ctxt->sax->error(ctxt->userData,		    "Space required after 'PUBLIC'\n");	    ctxt->wellFormed = 0;	}        SKIP_BLANKS;	*publicID = htmlParsePubidLiteral(ctxt);	if (*publicID == NULL) {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	        ctxt->sax->error(ctxt->userData, 	          "htmlParseExternalID: PUBLIC, no Public Identifier\n");	    ctxt->wellFormed = 0;	}        SKIP_BLANKS;        if ((CUR == '"') || (CUR == '\'')) {	    URI = htmlParseSystemLiteral(ctxt);	}    }    return(URI);}/** * htmlParseComment: * @ctxt:  an HTML parser context * * Parse an XML (SGML) comment <!-- .... --> * * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */voidhtmlParseComment(htmlParserCtxtPtr ctxt) {    xmlChar *buf = NULL;    int len = 0;    int size = HTML_PARSER_BUFFER_SIZE;    register xmlChar s, r, q;    /*     * Check that there is a comment right here.     */    if ((CUR != '<') || (NXT(1) != '!') ||        (NXT(2) != '-') || (NXT(3) != '-')) return;    buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));    if (buf == NULL) {	fprintf(stderr, "malloc of %d byte failed\n", size);	return;    }    q = r = '-'; /* 0 or '-' to cover our ass against <!--> and <!---> ? !!! */    SKIP(4);    s = CUR;        while (IS_CHAR(s) &&           ((s != '>') || (r != '-') || (q != '-'))) {	if (len + 1 >= size) {	    size *= 2;	    buf = xmlRealloc(buf, size * sizeof(xmlChar));	    if (buf == NULL) {		fprintf(stderr, "realloc of %d byte failed\n", size);		return;	    }	}	buf[len++] = s;        NEXT;	q = r;	r = s;	s = CUR;    }    buf[len - 2] = 0;    if (!IS_CHAR(s)) {	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	    ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", buf);	ctxt->wellFormed = 0;    } else {        NEXT;	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL)) {	    ctxt->sax->comment(ctxt->userData, buf);	}    }    xmlFree(buf);}/** * htmlParseCharRef: * @ctxt:  an HTML parser context * * parse Reference declarations * * [66] CharRef ::= '&#' [0-9]+ ';' | *                  '&#x' [0-9a-fA-F]+ ';' * * Returns the value parsed (as an int) */inthtmlParseCharRef(htmlParserCtxtPtr ctxt) {    int val = 0;    if ((CUR == '&') && (NXT(1) == '#') &&        (NXT(2) == 'x')) {	SKIP(3);	while (CUR != ';') {	    if ((CUR >= '0') && (CUR <= '9')) 	        val = val
htmlparser.c - 源码说明

本页面展示了「SIP 1.5.0源代码」中的 htmlparser.c 源码文件，采用 C语言编程语言编写，共 1,997 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与SIP相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?