⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.c.svn-base

📁 这是一个用于解析xml文件的类库。使用这个类库
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
	EMPTY, NULL, NULL, DECL frame_attrs, NULL},{ "frameset",	0, 0, 0, 0, 0, 2, 0, "window subdivision" ,	DECL frameset_contents, "noframes" , NULL , DECL frameset_attrs, NULL},{ "h1",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h2",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h3",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h4",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h5",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "h6",		0, 0, 0, 0, 0, 0, 0, "heading ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "head",	1, 1, 0, 0, 0, 0, 0, "document head ",	DECL head_contents, NULL, DECL head_attrs, NULL, NULL},{ "hr",		0, 2, 2, 1, 0, 0, 0, "horizontal rule " ,	EMPTY, NULL, DECL html_attrs, DECL hr_depr, NULL},{ "html",	1, 1, 0, 0, 0, 0, 0, "document root element ",	DECL html_content , NULL , DECL i18n_attrs, DECL version_attr, NULL},{ "i",		0, 3, 0, 0, 0, 0, 1, "italic text style",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "iframe",	0, 0, 0, 0, 0, 1, 2, "inline subwindow ",	DECL html_flow, NULL, NULL, DECL iframe_attrs, NULL},{ "img",	0, 2, 2, 1, 0, 0, 1, "embedded image ",	EMPTY, NULL, DECL img_attrs, DECL align_attr, src_alt_attrs},{ "input",	0, 2, 2, 1, 0, 0, 1, "form control ",	EMPTY, NULL, DECL input_attrs , DECL align_attr, NULL},{ "ins",	0, 0, 0, 0, 0, 0, 2, "inserted text",	DECL html_flow, NULL, DECL edit_attrs, NULL, NULL},{ "isindex",	0, 2, 2, 1, 1, 1, 0, "single line prompt ",	EMPTY, NULL, NULL, DECL prompt_attrs, NULL},{ "kbd",	0, 0, 0, 0, 0, 0, 1, "text to be entered by the user",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "label",	0, 0, 0, 0, 0, 0, 1, "form field label text ",	DECL html_inline MODIFIER, NULL, DECL label_attrs , NULL, NULL},{ "legend",	0, 0, 0, 0, 0, 0, 0, "fieldset legend ",	DECL html_inline, NULL, DECL legend_attrs , DECL align_attr, NULL},{ "li",		0, 1, 1, 0, 0, 0, 0, "list item ",	DECL html_flow, NULL, DECL html_attrs, NULL, NULL},{ "link",	0, 2, 2, 1, 0, 0, 0, "a media-independent link ",	EMPTY, NULL, DECL link_attrs, DECL target_attr, NULL},{ "map",	0, 0, 0, 0, 0, 0, 2, "client-side image map ",	DECL map_contents , NULL, DECL html_attrs , NULL, name_attr},{ "menu",	0, 0, 0, 0, 1, 1, 0, "menu list ",	DECL blockli_elt , NULL, NULL, DECL compact_attrs, NULL},{ "meta",	0, 2, 2, 1, 0, 0, 0, "generic metainformation ",	EMPTY, NULL, DECL meta_attrs , NULL , DECL content_attr},{ "noframes",	0, 0, 0, 0, 0, 2, 0, "alternate content container for non frame-based rendering ",	DECL noframes_content, "body" , DECL html_attrs, NULL, NULL},{ "noscript",	0, 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering ",	DECL html_flow, "div", DECL html_attrs, NULL, NULL},{ "object",	0, 0, 0, 0, 0, 0, 2, "generic embedded object ",	DECL object_contents , "div" , DECL object_attrs, DECL object_depr, NULL},{ "ol",		0, 0, 0, 0, 0, 0, 0, "ordered list ",	DECL li_elt , "li" , DECL html_attrs, DECL ol_attrs, NULL},{ "optgroup",	0, 0, 0, 0, 0, 0, 0, "option group ",	option_elt , "option", DECL optgroup_attrs, NULL, DECL label_attr},{ "option",	0, 1, 0, 0, 0, 0, 0, "selectable choice " ,	DECL html_pcdata, NULL, DECL option_attrs, NULL, NULL},{ "p",		0, 1, 0, 0, 0, 0, 0, "paragraph ",	DECL html_inline, NULL, DECL html_attrs, DECL align_attr, NULL},{ "param",	0, 2, 2, 1, 0, 0, 0, "named property value ",	EMPTY, NULL, DECL param_attrs, NULL, name_attr},{ "pre",	0, 0, 0, 0, 0, 0, 0, "preformatted text ",	DECL pre_content, NULL, DECL html_attrs, DECL width_attr, NULL},{ "q",		0, 0, 0, 0, 0, 0, 1, "short inline quotation ",	DECL html_inline, NULL, DECL quote_attrs, NULL, NULL},{ "s",		0, 3, 0, 0, 1, 1, 1, "strike-through text style",	DECL html_inline, NULL, NULL, DECL html_attrs, NULL},{ "samp",	0, 0, 0, 0, 0, 0, 1, "sample program output, scripts, etc.",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "script",	0, 0, 0, 0, 0, 0, 2, "script statements ",	DECL html_cdata, NULL, DECL script_attrs, DECL language_attr, DECL type_attr},{ "select",	0, 0, 0, 0, 0, 0, 1, "option selector ",	DECL select_content, NULL, DECL select_attrs, NULL, NULL},{ "small",	0, 3, 0, 0, 0, 0, 1, "small text style",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "span",	0, 0, 0, 0, 0, 0, 1, "generic language/style container ",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "strike",	0, 3, 0, 0, 1, 1, 1, "strike-through text",	DECL html_inline, NULL, NULL, DECL html_attrs, NULL},{ "strong",	0, 3, 0, 0, 0, 0, 1, "strong emphasis",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "style",	0, 0, 0, 0, 0, 0, 0, "style info ",	DECL html_cdata, NULL, DECL style_attrs, NULL, DECL type_attr},{ "sub",	0, 3, 0, 0, 0, 0, 1, "subscript",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "sup",	0, 3, 0, 0, 0, 0, 1, "superscript ",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "table",	0, 0, 0, 0, 0, 0, 0, "",	DECL table_contents , "tr" , DECL table_attrs , DECL table_depr, NULL},{ "tbody",	1, 0, 0, 0, 0, 0, 0, "table body ",	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL},{ "td",		0, 0, 0, 0, 0, 0, 0, "table data cell",	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL},{ "textarea",	0, 0, 0, 0, 0, 0, 1, "multi-line text field ",	DECL html_pcdata, NULL, DECL textarea_attrs, NULL, DECL rows_cols_attr},{ "tfoot",	0, 1, 0, 0, 0, 0, 0, "table footer ",	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL},{ "th",		0, 1, 0, 0, 0, 0, 0, "table header cell",	DECL html_flow, NULL, DECL th_td_attr, DECL th_td_depr, NULL},{ "thead",	0, 1, 0, 0, 0, 0, 0, "table header ",	DECL tr_elt , "tr" , DECL talign_attrs, NULL, NULL},{ "title",	0, 0, 0, 0, 0, 0, 0, "document title ",	DECL html_pcdata, NULL, DECL i18n_attrs, NULL, NULL},{ "tr",		0, 0, 0, 0, 0, 0, 0, "table row ",	DECL tr_contents , "td" , DECL talign_attrs, DECL bgcolor_attr, NULL},{ "tt",		0, 3, 0, 0, 0, 0, 1, "teletype or monospaced text style",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "u",		0, 3, 0, 0, 1, 1, 1, "underlined text style",	DECL html_inline, NULL, NULL, DECL html_attrs, NULL},{ "ul",		0, 0, 0, 0, 0, 0, 0, "unordered list ",	DECL li_elt , "li" , DECL html_attrs, DECL ul_depr, NULL},{ "var",	0, 0, 0, 0, 0, 0, 1, "instance of a variable or program argument",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL}};/* * start tags that imply the end of current element */static const char *htmlStartClose[] = {"form",		"form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",		"dl", "ul", "ol", "menu", "dir", "address", "pre",		"listing", "xmp", "head", NULL,"head",		"p", NULL,"title",	"p", NULL,"body",		"head", "style", "link", "title", "p", NULL,"frameset",	"head", "style", "link", "title", "p", NULL,"li",		"p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",		"pre", "listing", "xmp", "head", "li", NULL,"hr",		"p", "head", NULL,"h1",		"p", "head", NULL,"h2",		"p", "head", NULL,"h3",		"p", "head", NULL,"h4",		"p", "head", NULL,"h5",		"p", "head", NULL,"h6",		"p", "head", NULL,"dir",		"p", "head", NULL,"address",	"p", "head", "ul", NULL,"pre",		"p", "head", "ul", NULL,"listing",	"p", "head", NULL,"xmp",		"p", "head", NULL,"blockquote",	"p", "head", NULL,"dl",		"p", "dt", "menu", "dir", "address", "pre", "listing",		"xmp", "head", NULL,"dt",		"p", "menu", "dir", "address", "pre", "listing", "xmp",                "head", "dd", NULL,"dd",		"p", "menu", "dir", "address", "pre", "listing", "xmp",                "head", "dt", NULL,"ul",		"p", "head", "ol", "menu", "dir", "address", "pre",		"listing", "xmp", NULL,"ol",		"p", "head", "ul", NULL,"menu",		"p", "head", "ul", NULL,"p",		"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL,"div",		"p", "head", NULL,"noscript",	"p", "head", NULL,"center",	"font", "b", "i", "p", "head", NULL,"a",		"a", NULL,"caption",	"p", NULL,"colgroup",	"caption", "colgroup", "col", "p", NULL,"col",		"caption", "col", "p", NULL,"table",	"p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",		"listing", "xmp", "a", NULL,"th",		"th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,"td",		"th", "td", "p", "span", "font", "a", "b", "i", "u", NULL,      "tr",		"th", "td", "tr", "caption", "col", "colgroup", "p", NULL,"thead",	"caption", "col", "colgroup", NULL,"tfoot",	"th", "td", "tr", "caption", "col", "colgroup", "thead",		"tbody", "p", NULL,"tbody",	"th", "td", "tr", "caption", "col", "colgroup", "thead",		"tfoot", "tbody", "p", NULL,"optgroup",	"option", NULL,"option",	"option", NULL,"fieldset",	"legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",		"pre", "listing", "xmp", "a", NULL,NULL};/* * The list of HTML elements which are supposed not to have * CDATA content and where a p element will be implied * * TODO: extend that list by reading the HTML SGML DTD on *       implied paragraph */static const char *htmlNoContentElements[] = {    "html",    "head",    "body",    NULL};/* * The list of HTML attributes which are of content %Script; * NOTE: when adding ones, check htmlIsScriptAttribute() since *       it assumes the name starts with 'on' */static const char *htmlScriptAttributes[] = {    "onclick",    "ondblclick",    "onmousedown",    "onmouseup",    "onmouseover",    "onmousemove",    "onmouseout",    "onkeypress",    "onkeydown",    "onkeyup",    "onload",    "onunload",    "onfocus",    "onblur",    "onsubmit",    "onrest",    "onchange",    "onselect"};/* * This table is used by the htmlparser to know what to do with * broken html pages. By assigning different priorities to different * elements the parser can decide how to handle extra endtags. * Endtags are only allowed to close elements with lower or equal * priority. */ typedef struct {    const char *name;    int priority;} elementPriority;static const elementPriority htmlEndPriority[] = {    {"div",   150},    {"td",    160},    {"th",    160},    {"tr",    170},    {"thead", 180},    {"tbody", 180},    {"tfoot", 180},    {"table", 190},    {"head",  200},    {"body",  200},    {"html",  220},    {NULL,    100} /* Default priority */};static const char** htmlStartCloseIndex[100];static int htmlStartCloseIndexinitialized = 0;/************************************************************************ *									* * 		functions to handle HTML specific data			* *									* ************************************************************************//** * htmlInitAutoClose: * * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. * This is not reentrant. Call xmlInitParser() once before processing in * case of use in multithreaded programs. */voidhtmlInitAutoClose(void) {    int indx, i = 0;    if (htmlStartCloseIndexinitialized) return;    for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL;    indx = 0;    while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) {        htmlStartCloseIndex[indx++] = &htmlStartClose[i];	while (htmlStartClose[i] != NULL) i++;	i++;    }    htmlStartCloseIndexinitialized = 1;}/** * htmlTagLookup: * @tag:  The tag name in lowercase * * Lookup the HTML tag in the ElementTable * * Returns the related htmlElemDescPtr or NULL if not found. */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -