⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.c

📁 xml开源解析代码.版本为libxml2-2.6.29,可支持GB3212.网络消息发送XML时很有用.
💻 C
📖 第 1 页 / 共 5 页
字号:
	EMPTY , NULL , DECL col_attrs , NULL, NULL},{ "colgroup",	0, 1, 0, 0, 0, 0, 0, "table column group ",	DECL col_elt , "col" , DECL col_attrs , NULL, NULL},{ "dd",		0, 1, 0, 0, 0, 0, 0, "definition description ",	DECL html_flow , NULL , DECL html_attrs, NULL, NULL},{ "del",	0, 0, 0, 0, 0, 0, 2, "deleted text ",	DECL html_flow , NULL , DECL edit_attrs , NULL, NULL},{ "dfn",	0, 0, 0, 0, 0, 0, 1, "instance definition",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "dir",	0, 0, 0, 0, 1, 1, 0, "directory list",	DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL},{ "div",	0, 0, 0, 0, 0, 0, 0, "generic language/style container",	DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL},{ "dl",		0, 0, 0, 0, 0, 0, 0, "definition list ",	DECL dl_contents , "dd" , DECL html_attrs, DECL compact_attr, NULL},{ "dt",		0, 1, 0, 0, 0, 0, 0, "definition term ",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "em",		0, 3, 0, 0, 0, 0, 1, "emphasis",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "embed",	0, 1, 2, 0, 1, 1, 1, "generic embedded object ",	EMPTY, NULL, DECL embed_attrs, NULL, NULL},{ "fieldset",	0, 0, 0, 0, 0, 0, 0, "form control group ",	DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL},{ "font",	0, 3, 0, 0, 1, 1, 1, "local change to font ",	DECL html_inline, NULL, NULL, DECL font_attrs, NULL},{ "form",	0, 0, 0, 0, 0, 0, 0, "interactive form ",	DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr},{ "frame",	0, 2, 2, 1, 0, 2, 0, "subwindow " ,	EMPTY, NULL, NULL, DECL frame_attrs, NULL},{ "frameset",	0, 0, 0, 0, 0, 2, 0, "window subdivision" ,	DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL},{ "h1",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h2",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h3",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h4",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h5",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h6",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "head",	1, 1, 0, 0, 0, 0, 0, "document head ",	DECL head_contents, NULL, DECL head_attrs, NULL, NULL},{ "hr",		0, 2, 2, 1, 0, 0, 0, "horizontal rule " ,	EMPTY, NULL, DECL html_attrs, DECL hr_depr, NULL},{ "html",	1, 1, 0, 0, 0, 0, 0, "document root element ",	DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL},{ "i",		0, 3, 0, 0, 0, 0, 1, "italic text style",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "iframe",	0, 0, 0, 0, 0, 1, 2, "inline subwindow ",	DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL},{ "img",	0, 2, 2, 1, 0, 0, 1, "embedded image ",	EMPTY, NULL, DECL img_attrs, DECL align_attr, DECL src_alt_attrs},{ "input",	0, 2, 2, 1, 0, 0, 1, "form control ",	EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL},{ "ins",	0, 0, 0, 0, 0, 0, 2, "inserted text",	DECL html_flow, NULL, DECL edit_attrs, NULL, NULL},{ "isindex",	0, 2, 2, 1, 1, 1, 0, "single line prompt ",	EMPTY, NULL, NULL, DECL prompt_attrs, NULL},{ "kbd",	0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "label",	0, 0, 0, 0, 0, 0, 1, "form field label text ",	DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL},{ "legend",	0, 0, 0, 0, 0, 0, 0, "fieldset legend ",	DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL},{ "li",		0, 1, 1, 0, 0, 0, 0, "list item ",	DECL html_flow, NULL, DECL html_attrs, NULL, NULL},{ "link",	0, 2, 2, 1, 0, 0, 0, "a media-independent link ",	EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL},{ "map",	0, 0, 0, 0, 0, 0, 2, "client-side image map ",	DECL map_contents , NULL, DECL html_attrs , NULL, DECL name_attr},{ "menu",	0, 0, 0, 0, 1, 1, 0, "menu list ",	DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL},{ "meta",	0, 2, 2, 1, 0, 0, 0, "generic metainformation ",	EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr},{ "noframes",	0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",	DECL noframes_content, "body" , DECL html_attrs, NULL, NULL},{ "noscript",	0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",	DECL html_flow, "div", DECL html_attrs, NULL, NULL},{ "object",	0, 0, 0, 0, 0, 0, 2, "generic embedded object ",	DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL},{ "ol",		0, 0, 0, 0, 0, 0, 0, "ordered list ",	DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL},{ "optgroup",	0, 0, 0, 0, 0, 0, 0, "option group ",	DECL option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr},{ "option",	0, 1, 0, 0, 0, 0, 0, "selectable choice " ,	DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL},{ "p",		0, 1, 0, 0, 0, 0, 0, "paragraph ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "param",	0, 2, 2, 1, 0, 0, 0, "named property value ",	EMPTY, NULL, DECL param_attrs, NULL, DECL name_attr},{ "pre",	0, 0, 0, 0, 0, 0, 0, "preformatted text ",	DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL},{ "q",		0, 0, 0, 0, 0, 0, 1, "short inline quotation ",	DECL html_inline, NULL, DECL quote_attrs, NULL, NULL},{ "s",		0, 3, 0, 0, 1, 1, 1, "strike-through text style",	DECL html_inline, NULL, NULL, DECL html_attrs, NULL},{ "samp",	0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "script",	0, 0, 0, 0, 0, 0, 2, "script statements ",	DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr},{ "select",	0, 0, 0, 0, 0, 0, 1, "option selector ",	DECL select_content, NULL, DECL select_attrs, NULL, NULL},{ "small",	0, 3, 0, 0, 0, 0, 1, "small text style",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "span",	0, 0, 0, 0, 0, 0, 1, "generic language/style container ",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "strike",	0, 3, 0, 0, 1, 1, 1, "strike-through text",	DECL html_inline, NULL, NULL, DECL html_attrs, NULL},{ "strong",	0, 3, 0, 0, 0, 0, 1, "strong emphasis",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "style",	0, 0, 0, 0, 0, 0, 0, "style info ",	DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr},{ "sub",	0, 3, 0, 0, 0, 0, 1, "subscript",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "sup",	0, 3, 0, 0, 0, 0, 1, "superscript ",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "table",	0, 0, 0, 0, 0, 0, 0, "",	DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL},{ "tbody",	1, 0, 0, 0, 0, 0, 0, "table body ",	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL},{ "td",		0, 0, 0, 0, 0, 0, 0, "table data cell",	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL},{ "textarea",	0, 0, 0, 0, 0, 0, 1, "multi-line text field ",	DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr},{ "tfoot",	0, 1, 0, 0, 0, 0, 0, "table footer ",	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL},{ "th",		0, 1, 0, 0, 0, 0, 0, "table header cell",	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL},{ "thead",	0, 1, 0, 0, 0, 0, 0, "table header ",	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL},{ "title",	0, 0, 0, 0, 0, 0, 0, "document title ",	DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL},{ "tr",		0, 0, 0, 0, 0, 0, 0, "table row ",	DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL},{ "tt",		0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "u",		0, 3, 0, 0, 1, 1, 1, "underlined text style",	DECL html_inline, NULL, NULL, DECL html_attrs, NULL},{ "ul",		0, 0, 0, 0, 0, 0, 0, "unordered list ",	DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL},{ "var",	0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL}};/* * start tags that imply the end of current element */static const char * const htmlStartClose[] = {"form",		"form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",		"dl", "ul", "ol", "menu", "dir", "address", "pre",		"listing", "xmp", "head", NULL,"head",		"p", NULL,"title",	"p", NULL,"body",		"head", "style", "link", "title", "p", NULL,"frameset",	"head", "style", "link", "title", "p", NULL,"li",		"p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",		"pre", "listing", "xmp", "head", "li", NULL,"hr",		"p", "head", NULL,"h1",		"p", "head", NULL,"h2",		"p", "head", NULL,"h3",		"p", "head", NULL,"h4",		"p", "head", NULL,"h5",		"p", "head", NULL,"h6",		"p", "head", NULL,"dir",		"p", "head", NULL,"address",	"p", "head", "ul", NULL,"pre",		"p", "head", "ul", NULL,"listing",	"p", "head", NULL,"xmp",		"p", "head", NULL,"blockquote",	"p", "head", NULL,"dl",		"p", "dt", "menu", "dir", "address", "pre", "listing",		"xmp", "head", NULL,"dt",		"p", "menu", "dir", "address", "pre", "listing", "xmp",                "head", "dd", NULL,"dd",		"p", "menu", "dir", "address", "pre", "listing", "xmp",                "head", "dt", NULL,"ul",		"p", "head", "ol", "menu", "dir", "address", "pre",		"listing", "xmp", NULL,"ol",		"p", "head", "ul", NULL,"menu",		"p", "head", "ul", NULL,"p",		"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL,"div",		"p", "head", NULL,"noscript",	"p", "head", NULL,"center",	"font", "b", "i", "p", "head", NULL,"a",		"a", NULL,"caption",	"p", NULL,"colgroup",	"caption", "colgroup", "col", "p", NULL,"col",		"caption", "col", "p", NULL,"table",	"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",		"listing", "xmp", "a", NULL,"th",		"th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,"td",		"th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,      "tr",		"th", "td", "tr", "caption", "col", "colgroup", "p", NULL,"thead",	"caption", "col", "colgroup", NULL,"tfoot",	"th", "td", "tr", "caption", "col", "colgroup", "thead",		"tbody", "p", NULL,"tbody",	"th", "td", "tr", "caption", "col", "colgroup", "thead",		"tfoot", "tbody", "p", NULL,"optgroup",	"option", NULL,"option",	"option", NULL,"fieldset",	"legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",		"pre", "listing", "xmp", "a", NULL,NULL};/* * The list of HTML elements which are supposed not to have * CDATA content and where a p element will be implied * * TODO: extend that list by reading the HTML SGML DTD on *       implied paragraph */static const char *const htmlNoContentElements[] = {    "html",    "head",    NULL};/* * The list of HTML attributes which are of content %Script; * NOTE: when adding ones, check htmlIsScriptAttribute() since *       it assumes the name starts with 'on' */static const char *const htmlScriptAttributes[] = {    "onclick",    "ondblclick",    "onmousedown",    "onmouseup",    "onmouseover",    "onmousemove",    "onmouseout",    "onkeypress",    "onkeydown",    "onkeyup",    "onload",    "onunload",    "onfocus",    "onblur",    "onsubmit",    "onrest",    "onchange",    "onselect"};/* * This table is used by the htmlparser to know what to do with * broken html pages. By assigning different priorities to different * elements the parser can decide how to handle extra endtags. * Endtags are only allowed to close elements with lower or equal * priority. */ typedef struct {    const char *name;    int priority;} elementPriority;static const elementPriority htmlEndPriority[] = {    {"div",   150},    {"td",    160},    {"th",    160},    {"tr",    170},    {"thead", 180},    {"tbody", 180},    {"tfoot", 180},    {"table", 190},

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -