⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.c

📁 SIP 1.5.0源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
 * Returns the new input stream or NULL */htmlParserInputPtrhtmlNewInputStream(htmlParserCtxtPtr ctxt) {    htmlParserInputPtr input;    input = (xmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput));    if (input == NULL) {        ctxt->errNo = XML_ERR_NO_MEMORY;	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	    ctxt->sax->error(ctxt->userData, 	                     "malloc: couldn't allocate a new input stream\n");	ctxt->errNo = XML_ERR_NO_MEMORY;	return(NULL);    }    input->filename = NULL;    input->directory = NULL;    input->base = NULL;    input->cur = NULL;    input->buf = NULL;    input->line = 1;    input->col = 1;    input->buf = NULL;    input->free = NULL;    input->consumed = 0;    input->length = 0;    return(input);}/************************************************************************ *									* *		Commodity functions, cleanup needed ?			* *									* ************************************************************************//** * areBlanks: * @ctxt:  an HTML parser context * @str:  a xmlChar * * @len:  the size of @str * * Is this a sequence of blank chars that one can ignore ? * * Returns 1 if ignorable 0 otherwise. */static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) {    int i;    xmlNodePtr lastChild;    for (i = 0;i < len;i++)        if (!(IS_BLANK(str[i]))) return(0);    if (CUR != '<') return(0);    if (ctxt->node == NULL) return(0);    lastChild = xmlGetLastChild(ctxt->node);    if (lastChild == NULL) {        if (ctxt->node->content != NULL) return(0);    } else if (xmlNodeIsText(lastChild))        return(0);    return(1);}/** * htmlHandleEntity: * @ctxt:  an HTML parser context * @entity:  an XML entity pointer. * * Default handling of an HTML entity, call the parser with the * substitution string */voidhtmlHandleEntity(htmlParserCtxtPtr ctxt, xmlEntityPtr entity) {    int len;    if (entity->content == NULL) {        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))	    ctxt->sax->error(ctxt->userData, "htmlHandleEntity %s: content == NULL\n",	               entity->name);	ctxt->wellFormed = 0;        return;    }    len = xmlStrlen(entity->content);    /*     * Just handle the content as a set of chars.     */    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))	ctxt->sax->characters(ctxt->userData, entity->content, len);}/** * htmlNewDoc: * @URI:  URI for the dtd, or NULL * @ExternalID:  the external ID of the DTD, or NULL * * Returns a new document */htmlDocPtrhtmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {    xmlDocPtr cur;    /*     * Allocate a new document and fill the fields.     */    cur = (xmlDocPtr) xmlMalloc(sizeof(xmlDoc));    if (cur == NULL) {        fprintf(stderr, "xmlNewDoc : malloc failed\n");	return(NULL);    }    memset(cur, 0, sizeof(xmlDoc));    cur->type = XML_HTML_DOCUMENT_NODE;    cur->version = NULL;    cur->intSubset = NULL;    if ((ExternalID == NULL) &&	(URI == NULL))	xmlCreateIntSubset(cur, BAD_CAST "HTML",		    BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN",		    BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd");    else	xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI);    cur->name = NULL;    cur->children = NULL;     cur->extSubset = NULL;    cur->oldNs = NULL;    cur->encoding = NULL;    cur->standalone = 1;    cur->compression = 0;    cur->ids = NULL;    cur->refs = NULL;#ifndef XML_WITHOUT_CORBA    cur->_private = NULL;#endif    return(cur);}/************************************************************************ *									* *			The parser itself				* *	Relates to http://www.w3.org/TR/html40				* *									* ************************************************************************//************************************************************************ *									* *			The parser itself				* *									* ************************************************************************//** * htmlParseHTMLName: * @ctxt:  an HTML parser context * * parse an HTML tag or attribute name, note that we convert it to lowercase * since HTML names are not case-sensitive. * * Returns the Tag Name parsed or NULL */xmlChar *htmlParseHTMLName(htmlParserCtxtPtr ctxt) {    xmlChar *ret = NULL;    int i = 0;    xmlChar loc[HTML_PARSER_BUFFER_SIZE];    if (!IS_LETTER(CUR) && (CUR != '_') &&        (CUR != ':')) return(NULL);    while ((i < HTML_PARSER_BUFFER_SIZE) &&           ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)))) {	if ((CUR >= 'A') && (CUR <= 'Z')) loc[i] = CUR + 0x20;        else loc[i] = CUR;	i++;		NEXT;    }        ret = xmlStrndup(loc, i);    return(ret);}/** * htmlParseName: * @ctxt:  an HTML parser context * * parse an HTML name, this routine is case sensistive. * * Returns the Name parsed or NULL */xmlChar *htmlParseName(htmlParserCtxtPtr ctxt) {    xmlChar buf[HTML_MAX_NAMELEN];    int len = 0;    GROW;    if (!IS_LETTER(CUR) && (CUR != '_')) {	return(NULL);    }    while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||           (CUR == '.') || (CUR == '-') ||	   (CUR == '_') || (CUR == ':') || 	   (IS_COMBINING(CUR)) ||	   (IS_EXTENDER(CUR))) {	buf[len++] = CUR;	NEXT;	if (len >= HTML_MAX_NAMELEN) {	    fprintf(stderr, 	       "htmlParseName: reached HTML_MAX_NAMELEN limit\n");	    while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||		   (CUR == '.') || (CUR == '-') ||		   (CUR == '_') || (CUR == ':') || 		   (IS_COMBINING(CUR)) ||		   (IS_EXTENDER(CUR)))		 NEXT;	    break;	}    }    return(xmlStrndup(buf, len));}/** * htmlParseHTMLAttribute: * @ctxt:  an HTML parser context * @stop:  a char stop value *  * parse an HTML attribute value till the stop (quote), if * stop is 0 then it stops at the first space * * Returns the attribute parsed or NULL */xmlChar *htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) {#if 0    xmlChar buf[HTML_MAX_NAMELEN];    int len = 0;    GROW;    while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {	if ((stop == 0) && (IS_BLANK(CUR))) break;	buf[len++] = CUR;	NEXT;	if (len >= HTML_MAX_NAMELEN) {	    fprintf(stderr, 	       "htmlParseHTMLAttribute: reached HTML_MAX_NAMELEN limit\n");	    while ((!IS_BLANK(CUR)) && (CUR != '<') &&		   (CUR != '>') &&		   (CUR != '\'') && (CUR != '"'))		 NEXT;	    break;	}    }    return(xmlStrndup(buf, len));#else        xmlChar *buffer = NULL;    int buffer_size = 0;    xmlChar *out = NULL;    xmlChar *name = NULL;    xmlChar *cur = NULL;    htmlEntityDescPtr ent;    /*     * allocate a translation buffer.     */    buffer_size = HTML_PARSER_BIG_BUFFER_SIZE;    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));    if (buffer == NULL) {	perror("htmlParseHTMLAttribute: malloc failed");	return(NULL);    }    out = buffer;    /*     * Ok loop until we reach one of the ending chars     */    while ((CUR != 0) && (CUR != stop) && (CUR != '>')) {	if ((stop == 0) && (IS_BLANK(CUR))) break;        if (CUR == '&') {	    if (NXT(1) == '#') {		int val = htmlParseCharRef(ctxt);		*out++ = val;	    } else {		ent = htmlParseEntityRef(ctxt, &name);		if (name == NULL) {		    *out++ = '&';		    if (out - buffer > buffer_size - 100) {			int index = out - buffer;			growBuffer(buffer);			out = &buffer[index];		    }		} else if ((ent == NULL) || (ent->value <= 0) ||		           (ent->value >= 255)) {		    *out++ = '&';		    cur = name;		    while (*cur != 0) {			if (out - buffer > buffer_size - 100) {			    int index = out - buffer;			    growBuffer(buffer);			    out = &buffer[index];			}			*out++ = *cur++;		    }		    xmlFree(name);		} else {		    *out++ = ent->value;		    if (out - buffer > buffer_size - 100) {			int index = out - buffer;			growBuffer(buffer);			out = &buffer[index];		    }		    xmlFree(name);		}	    }	} else {	    *out++ = CUR;	    if (out - buffer > buffer_size - 100) {	      int index = out - buffer;	      	      growBuffer(buffer);	      out = &buffer[index];	    }	    NEXT;	}    }    *out++ = 0;    return(buffer);#endif}/** * htmlParseNmtoken: * @ctxt:  an HTML parser context *  * parse an HTML Nmtoken. * * Returns the Nmtoken parsed or NULL */xmlChar *htmlParseNmtoken(htmlParserCtxtPtr ctxt) {    xmlChar buf[HTML_MAX_NAMELEN];    int len = 0;    GROW;    while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||           (CUR == '.') || (CUR == '-') ||	   (CUR == '_') || (CUR == ':') || 	   (IS_COMBINING(CUR)) ||	   (IS_EXTENDER(CUR))) {	buf[len++] = CUR;	NEXT;	if (len >= HTML_MAX_NAMELEN) {	    fprintf(stderr, 	       "htmlParseNmtoken: reached HTML_MAX_NAMELEN limit\n");	    while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||		   (CUR == '.') || (CUR == '-') ||		   (CUR == '_') || (CUR == ':') || 		   (IS_COMBINING(CUR)) ||		   (IS_EXTENDER(CUR)))		 NEXT;	    break;	}    }    return(xmlStrndup(buf, len));}/** * htmlParseEntityRef: * @ctxt:  an HTML parser context * @str:  location to store the entity name * * parse an HTML ENTITY references * * [68] EntityRef ::= '&' Name ';' * * Returns the associated htmlEntityDescPtr if found, or NULL otherwise, *         if non-NULL *str will have to be freed by the caller. */htmlEntityDescPtrhtmlParseEntityRef(htmlParserCtxtPtr ctxt, xmlChar **str) {    xmlChar *name;    htmlEntityDescPtr ent = NULL;    *str = NULL;    if (CUR == '&') {        NEXT;        name = htmlParseName(ctxt);	if (name == NULL) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -