📄 htmltree.c
字号:
/* * HTMLtree.c : implemetation of access function for an HTML tree. * * See Copyright for the status of this software. * * Daniel.Veillard@w3.org */#include "global.h"#ifdef WIN32#include "win32config.h"#else#include "config.h"#endif#include "xmlversion.h"#ifdef LIBXML_HTML_ENABLED#include <stdio.h>#include <string.h> /* for memset() only ! */#ifdef HAVE_CTYPE_H#include <ctype.h>#endif#ifdef HAVE_STDLIB_H#include <stdlib.h>#endif#include <libxml/xmlmemory.h>#include <libxml/HTMLparser.h>#include <libxml/HTMLtree.h>#include <libxml/entities.h>#include <libxml/valid.h>static voidhtmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);/** * htmlDtdDump: * @buf: the HTML buffer output * @doc: the document * * Dump the HTML document DTD, if any. */static voidhtmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) { xmlDtdPtr cur = doc->intSubset; if (cur == NULL) { fprintf(stderr, "htmlDtdDump : no internal subset\n"); return; } xmlBufferWriteChar(buf, "<!DOCTYPE "); xmlBufferWriteCHAR(buf, cur->name); if (cur->ExternalID != NULL) { xmlBufferWriteChar(buf, " PUBLIC "); xmlBufferWriteQuotedString(buf, cur->ExternalID); if (cur->SystemID != NULL) { xmlBufferWriteChar(buf, " "); xmlBufferWriteQuotedString(buf, cur->SystemID); } } else if (cur->SystemID != NULL) { xmlBufferWriteChar(buf, " SYSTEM "); xmlBufferWriteQuotedString(buf, cur->SystemID); } xmlBufferWriteChar(buf, ">\n");}/** * htmlAttrDump: * @buf: the HTML buffer output * @doc: the document * @cur: the attribute pointer * * Dump an HTML attribute */static voidhtmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { xmlChar *value; if (cur == NULL) { fprintf(stderr, "htmlAttrDump : property == NULL\n"); return; } xmlBufferWriteChar(buf, " "); xmlBufferWriteCHAR(buf, cur->name); value = xmlNodeListGetString(doc, cur->children, 0); if (value) { xmlBufferWriteChar(buf, "="); xmlBufferWriteQuotedString(buf, value); xmlFree(value); } else { xmlBufferWriteChar(buf, "=\"\""); }}/** * htmlAttrListDump: * @buf: the HTML buffer output * @doc: the document * @cur: the first attribute pointer * * Dump a list of HTML attributes */static voidhtmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { if (cur == NULL) { fprintf(stderr, "htmlAttrListDump : property == NULL\n"); return; } while (cur != NULL) { htmlAttrDump(buf, doc, cur); cur = cur->next; }}voidhtmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);/** * htmlNodeListDump: * @buf: the HTML buffer output * @doc: the document * @cur: the first node * * Dump an HTML node list, recursive behaviour,children are printed too. */static voidhtmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { if (cur == NULL) { fprintf(stderr, "htmlNodeListDump : node == NULL\n"); return; } while (cur != NULL) { htmlNodeDump(buf, doc, cur); cur = cur->next; }}/** * htmlNodeDump: * @buf: the HTML buffer output * @doc: the document * @cur: the current node * * Dump an HTML node, recursive behaviour,children are printed too. */voidhtmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { htmlElemDescPtr info; if (cur == NULL) { fprintf(stderr, "htmlNodeDump : node == NULL\n"); return; } /* * Special cases. */ if (cur->type == XML_HTML_DOCUMENT_NODE) { htmlDocContentDump(buf, (xmlDocPtr) cur); return; } if (cur->type == HTML_TEXT_NODE) { if (cur->content != NULL) { xmlChar *buffer; /* uses the HTML encoding routine !!!!!!!!!! */#ifndef XML_USE_BUFFER_CONTENT buffer = xmlEncodeEntitiesReentrant(doc, cur->content);#else buffer = xmlEncodeEntitiesReentrant(doc, xmlBufferContent(cur->content));#endif if (buffer != NULL) { xmlBufferWriteCHAR(buf, buffer); xmlFree(buffer); } } return; } if (cur->type == HTML_COMMENT_NODE) { if (cur->content != NULL) { xmlBufferWriteChar(buf, "<!--");#ifndef XML_USE_BUFFER_CONTENT xmlBufferWriteCHAR(buf, cur->content);#else xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));#endif xmlBufferWriteChar(buf, "-->"); } return; } if (cur->type == HTML_ENTITY_REF_NODE) { xmlBufferWriteChar(buf, "&"); xmlBufferWriteCHAR(buf, cur->name); xmlBufferWriteChar(buf, ";"); return; } /* * Get specific HTmL info for taht node. */ info = htmlTagLookup(cur->name); xmlBufferWriteChar(buf, "<"); xmlBufferWriteCHAR(buf, cur->name); if (cur->properties != NULL) htmlAttrListDump(buf, doc, cur->properties); if ((info != NULL) && (info->empty)) { xmlBufferWriteChar(buf, ">"); if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlBufferWriteChar(buf, "\n"); } return; } if ((cur->content == NULL) && (cur->children == NULL)) { if ((info != NULL) && (info->endTag != 0)) xmlBufferWriteChar(buf, ">"); else { xmlBufferWriteChar(buf, "></"); xmlBufferWriteCHAR(buf, cur->name); xmlBufferWriteChar(buf, ">"); } if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlBufferWriteChar(buf, "\n"); } return; } xmlBufferWriteChar(buf, ">"); if (cur->content != NULL) { xmlChar *buffer;#ifndef XML_USE_BUFFER_CONTENT buffer = xmlEncodeEntitiesReentrant(doc, cur->content);#else buffer = xmlEncodeEntitiesReentrant(doc, xmlBufferContent(cur->content));#endif if (buffer != NULL) { xmlBufferWriteCHAR(buf, buffer); xmlFree(buffer); } } if (cur->children != NULL) { if ((cur->children->type != HTML_TEXT_NODE) && (cur->children->type != HTML_ENTITY_REF_NODE) && (cur->children != cur->last)) xmlBufferWriteChar(buf, "\n"); htmlNodeListDump(buf, doc, cur->children); if ((cur->last->type != HTML_TEXT_NODE) && (cur->last->type != HTML_ENTITY_REF_NODE) && (cur->children != cur->last)) xmlBufferWriteChar(buf, "\n"); } if (!htmlIsAutoClosed(doc, cur)) { xmlBufferWriteChar(buf, "</"); xmlBufferWriteCHAR(buf, cur->name); xmlBufferWriteChar(buf, ">"); } if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlBufferWriteChar(buf, "\n"); }}/** * htmlNodeDumpFile: * @out: the FILE pointer * @doc: the document * @cur: the current node * * Dump an HTML node, recursive behaviour,children are printed too. */voidhtmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { xmlBufferPtr buf; buf = xmlBufferCreate(); if (buf == NULL) return; htmlNodeDump(buf, doc, cur); xmlBufferDump(out, buf); xmlBufferFree(buf);}/** * htmlDocContentDump: * @buf: the HTML buffer output * @cur: the document * * Dump an HTML document. */static voidhtmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) { int type; /* * force to output the stuff as HTML, especially for entities */ type = cur->type; cur->type = XML_HTML_DOCUMENT_NODE; if (cur->intSubset != NULL) htmlDtdDump(buf, cur); else { /* Default to HTML-4.0 transitionnal @@@@ */ xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">"); } if (cur->children != NULL) { htmlNodeListDump(buf, cur, cur->children); } xmlBufferWriteChar(buf, "\n"); cur->type = type;}/** * htmlDocDumpMemory: * @cur: the document * @mem: OUT: the memory pointer * @size: OUT: the memory lenght * * Dump an HTML document in memory and return the xmlChar * and it's size. * It's up to the caller to free the memory. */voidhtmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { xmlBufferPtr buf; if (cur == NULL) {#ifdef DEBUG_TREE fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");#endif *mem = NULL; *size = 0; return; } buf = xmlBufferCreate(); if (buf == NULL) { *mem = NULL; *size = 0; return; } htmlDocContentDump(buf, cur); *mem = buf->content; *size = buf->use; memset(buf, -1, sizeof(xmlBuffer)); xmlFree(buf);}/** * htmlDocDump: * @f: the FILE* * @cur: the document * * Dump an HTML document to an open FILE. */voidhtmlDocDump(FILE *f, xmlDocPtr cur) { xmlBufferPtr buf; if (cur == NULL) {#ifdef DEBUG_TREE fprintf(stderr, "htmlDocDump : document == NULL\n");#endif return; } buf = xmlBufferCreate(); if (buf == NULL) return; htmlDocContentDump(buf, cur); xmlBufferDump(f, buf); xmlBufferFree(buf);}/** * htmlSaveFile: * @filename: the filename * @cur: the document * * Dump an HTML document to a file. * * returns: the number of byte written or -1 in case of failure. */inthtmlSaveFile(const char *filename, xmlDocPtr cur) { xmlBufferPtr buf; FILE *output = NULL; int ret; /* * save the content to a temp buffer. */ buf = xmlBufferCreate(); if (buf == NULL) return(0); htmlDocContentDump(buf, cur); output = fopen(filename, "w"); if (output == NULL) return(-1); ret = xmlBufferDump(output, buf); fclose(output); xmlBufferFree(buf); return(ret * sizeof(xmlChar));}#endif /* LIBXML_HTML_ENABLED */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -