📄 htmlparser.c
字号:
{"head", 200}, {"body", 200}, {"html", 220}, {NULL, 100} /* Default priority */};static const char** htmlStartCloseIndex[100];static int htmlStartCloseIndexinitialized = 0;/************************************************************************ * * * functions to handle HTML specific data * * * ************************************************************************//** * htmlInitAutoClose: * * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. * This is not reentrant. Call xmlInitParser() once before processing in * case of use in multithreaded programs. */voidhtmlInitAutoClose(void) { int indx, i = 0; if (htmlStartCloseIndexinitialized) return; for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL; indx = 0; while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) { htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i]; while (htmlStartClose[i] != NULL) i++; i++; } htmlStartCloseIndexinitialized = 1;}/** * htmlTagLookup: * @tag: The tag name in lowercase * * Lookup the HTML tag in the ElementTable * * Returns the related htmlElemDescPtr or NULL if not found. */const htmlElemDesc *htmlTagLookup(const xmlChar *tag) { unsigned int i; for (i = 0; i < (sizeof(html40ElementTable) / sizeof(html40ElementTable[0]));i++) { if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) return((htmlElemDescPtr) &html40ElementTable[i]); } return(NULL);}/** * htmlGetEndPriority: * @name: The name of the element to look up the priority for. * * Return value: The "endtag" priority. **/static inthtmlGetEndPriority (const xmlChar *name) { int i = 0; while ((htmlEndPriority[i].name != NULL) && (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name))) i++; return(htmlEndPriority[i].priority);}/** * htmlCheckAutoClose: * @newtag: The new tag name * @oldtag: The old tag name * * Checks whether the new tag is one of the registered valid tags for * closing old. * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. * * Returns 0 if no, 1 if yes. */static inthtmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag){ int i, indx; const char **closed = NULL; if (htmlStartCloseIndexinitialized == 0) htmlInitAutoClose(); /* inefficient, but not a big deal */ for (indx = 0; indx < 100; indx++) { closed = htmlStartCloseIndex[indx]; if (closed == NULL) return (0); if (xmlStrEqual(BAD_CAST * closed, newtag)) break; } i = closed - htmlStartClose; i++; while (htmlStartClose[i] != NULL) { if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) { return (1); } i++; } return (0);}/** * htmlAutoCloseOnClose: * @ctxt: an HTML parser context * @newtag: The new tag name * @force: force the tag closure * * The HTML DTD allows an ending tag to implicitly close other tags. */static voidhtmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag){ const htmlElemDesc *info; int i, priority; priority = htmlGetEndPriority(newtag); for (i = (ctxt->nameNr - 1); i >= 0; i--) { if (xmlStrEqual(newtag, ctxt->nameTab[i])) break; /* * A missplaced endtag can only close elements with lower * or equal priority, so if we find an element with higher * priority before we find an element with * matching name, we just ignore this endtag */ if (htmlGetEndPriority(ctxt->nameTab[i]) > priority) return; } if (i < 0) return; while (!xmlStrEqual(newtag, ctxt->name)) { info = htmlTagLookup(ctxt->name); if ((info != NULL) && (info->endTag == 3)) { htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, "Opening and ending tag mismatch: %s and %s\n", newtag, ctxt->name); } if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, ctxt->name); htmlnamePop(ctxt); }}/** * htmlAutoCloseOnEnd: * @ctxt: an HTML parser context * * Close all remaining tags at the end of the stream */static voidhtmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt){ int i; if (ctxt->nameNr == 0) return; for (i = (ctxt->nameNr - 1); i >= 0; i--) { if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, ctxt->name); htmlnamePop(ctxt); }}/** * htmlAutoClose: * @ctxt: an HTML parser context * @newtag: The new tag name or NULL * * The HTML DTD allows a tag to implicitly close other tags. * The list is kept in htmlStartClose array. This function is * called when a new tag has been detected and generates the * appropriates closes if possible/needed. * If newtag is NULL this mean we are at the end of the resource * and we should check */static voidhtmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag){ while ((newtag != NULL) && (ctxt->name != NULL) && (htmlCheckAutoClose(newtag, ctxt->name))) { if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, ctxt->name); htmlnamePop(ctxt); } if (newtag == NULL) { htmlAutoCloseOnEnd(ctxt); return; } while ((newtag == NULL) && (ctxt->name != NULL) && ((xmlStrEqual(ctxt->name, BAD_CAST "head")) || (xmlStrEqual(ctxt->name, BAD_CAST "body")) || (xmlStrEqual(ctxt->name, BAD_CAST "html")))) { if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, ctxt->name); htmlnamePop(ctxt); }}/** * htmlAutoCloseTag: * @doc: the HTML document * @name: The tag name * @elem: the HTML element * * The HTML DTD allows a tag to implicitly close other tags. * The list is kept in htmlStartClose array. This function checks * if the element or one of it's children would autoclose the * given tag. * * Returns 1 if autoclose, 0 otherwise */inthtmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) { htmlNodePtr child; if (elem == NULL) return(1); if (xmlStrEqual(name, elem->name)) return(0); if (htmlCheckAutoClose(elem->name, name)) return(1); child = elem->children; while (child != NULL) { if (htmlAutoCloseTag(doc, name, child)) return(1); child = child->next; } return(0);}/** * htmlIsAutoClosed: * @doc: the HTML document * @elem: the HTML element * * The HTML DTD allows a tag to implicitly close other tags. * The list is kept in htmlStartClose array. This function checks * if a tag is autoclosed by one of it's child * * Returns 1 if autoclosed, 0 otherwise */inthtmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) { htmlNodePtr child; if (elem == NULL) return(1); child = elem->children; while (child != NULL) { if (htmlAutoCloseTag(doc, elem->name, child)) return(1); child = child->next; } return(0);}/** * htmlCheckImplied: * @ctxt: an HTML parser context * @newtag: The new tag name * * The HTML DTD allows a tag to exists only implicitly * called when a new tag has been detected and generates the * appropriates implicit tags if missing */static voidhtmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { if (!htmlOmittedDefaultValue) return; if (xmlStrEqual(newtag, BAD_CAST"html")) return; if (ctxt->nameNr <= 0) { htmlnamePush(ctxt, BAD_CAST"html"); if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL); } if ((xmlStrEqual(newtag, BAD_CAST"body")) || (xmlStrEqual(newtag, BAD_CAST"head"))) return; if ((ctxt->nameNr <= 1) && ((xmlStrEqual(newtag, BAD_CAST"script")) || (xmlStrEqual(newtag, BAD_CAST"style")) || (xmlStrEqual(newtag, BAD_CAST"meta")) || (xmlStrEqual(newtag, BAD_CAST"link")) || (xmlStrEqual(newtag, BAD_CAST"title")) || (xmlStrEqual(newtag, BAD_CAST"base")))) { /* * dropped OBJECT ... i you put it first BODY will be * assumed ! */ htmlnamePush(ctxt, BAD_CAST"head"); if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && (!xmlStrEqual(newtag, BAD_CAST"frame")) && (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { int i; for (i = 0;i < ctxt->nameNr;i++) { if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { return; } if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"head")) { return; } } htmlnamePush(ctxt, BAD_CAST"body"); if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) ctxt->sax->startElement(ctxt->userData, BAD_CAST"body", NULL); }}/** * htmlCheckParagraph * @ctxt: an HTML parser context * * Check whether a p element need to be implied before inserting * characters in the current element. * * Returns 1 if a paragraph has been inserted, 0 if not and -1 * in case of error. */static inthtmlCheckParagraph(htmlParserCtxtPtr ctxt) { const xmlChar *tag; int i; if (ctxt == NULL) return(-1); tag = ctxt->name;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -