⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.c.svn-base

📁 这是一个用于解析xml文件的类库。使用这个类库
💻 SVN-BASE
📖 第 1 页 / 共 5 页
字号:
		    val |= cur[3] & 0x3f;		} else {		  /* 3-byte code */		    *len = 3;		    val = (cur[0] & 0xf) << 12;		    val |= (cur[1] & 0x3f) << 6;		    val |= cur[2] & 0x3f;		}	    } else {	      /* 2-byte code */		*len = 2;		val = (cur[0] & 0x1f) << 6;		val |= cur[1] & 0x3f;	    }	    if (!IS_CHAR(val)) {	        htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,				"Char 0x%X out of allowed range\n", val);	    }    	    return(val);	} else {	    /* 1-byte code */	    *len = 1;	    return((int) *ctxt->input->cur);	}    }    /*     * Assume it's a fixed length encoding (1) with     * a compatible encoding for the ASCII set, since     * XML constructs only use < 128 chars     */    *len = 1;    if ((int) *ctxt->input->cur < 0x80)	return((int) *ctxt->input->cur);    /*     * Humm this is bad, do an automatic flow conversion     */    xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);    ctxt->charset = XML_CHAR_ENCODING_UTF8;    return(xmlCurrentChar(ctxt, len));encoding_error:    /*     * If we detect an UTF8 error that probably mean that the     * input encoding didn't get properly advertized in the     * declaration header. Report the error and switch the encoding     * to ISO-Latin-1 (if you don't like this policy, just declare the     * encoding !)     */    htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING,		 "Input is not proper UTF-8, indicate encoding !\n",		 NULL, NULL);    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",			ctxt->input->cur[0], ctxt->input->cur[1],			ctxt->input->cur[2], ctxt->input->cur[3]);    }    ctxt->charset = XML_CHAR_ENCODING_8859_1;     *len = 1;    return((int) *ctxt->input->cur);}/** * htmlSkipBlankChars: * @ctxt:  the HTML parser context * * skip all blanks character found at that point in the input streams. * * Returns the number of space chars skipped */static inthtmlSkipBlankChars(xmlParserCtxtPtr ctxt) {    int res = 0;    while (IS_BLANK_CH(*(ctxt->input->cur))) {	if ((*ctxt->input->cur == 0) &&	    (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {		xmlPopInput(ctxt);	} else {	    if (*(ctxt->input->cur) == '\n') {		ctxt->input->line++; ctxt->input->col = 1;	    } else ctxt->input->col++;	    ctxt->input->cur++;	    ctxt->nbChars++;	    if (*ctxt->input->cur == 0)		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);	}	res++;    }    return(res);}/************************************************************************ *									* * 		The list of HTML elements and their properties		* *									* ************************************************************************//* *  Start Tag: 1 means the start tag can be ommited *  End Tag:   1 means the end tag can be ommited *             2 means it's forbidden (empty elements) *             3 means the tag is stylistic and should be closed easily *  Depr:      this element is deprecated *  DTD:       1 means that this element is valid only in the Loose DTD *             2 means that this element is valid only in the Frameset DTD * * Name,Start Tag,End Tag,Save End,Empty,Deprecated,DTD,inline,Description	, subElements , impliedsubelt , Attributes, userdata *//* Definitions and a couple of vars for HTML Elements */#define FONTSTYLE "tt", "i", "b", "u", "s", "strike", "big", "small"#define PHRASE "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr", "acronym"#define SPECIAL "a", "img", "applet", "object", "font", "basefont", "br", "script", "map", "q", "sub", "sup", "span", "bdo", "iframe"#define INLINE PCDATA FONTSTYLE PHRASE SPECIAL FORMCTRL#define BLOCK HEADING LIST "pre", "p", "dl", "div", "center", "noscript", "noframes", "blockquote", "form", "isindex", "hr", "table", "fieldset", "address"#define FORMCTRL "input", "select", "textarea", "label", "button"#define PCDATA#define HEADING "h1", "h2", "h3", "h4", "h5", "h6"#define LIST "ul", "ol", "dir", "menu"#define MODIFIER#define FLOW BLOCK,INLINE#define EMPTY NULLstatic const char* html_flow[] = { FLOW, NULL } ;static const char* html_inline[] = { INLINE, NULL } ;/* placeholders: elts with content but no subelements */static const char* html_pcdata[] = { NULL } ;#define html_cdata html_pcdata/* ... and for HTML Attributes */#define COREATTRS "id", "class", "style", "title"#define I18N "lang", "dir"#define EVENTS "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmouseout", "onkeypress", "onkeydown", "onkeyup"#define ATTRS COREATTRS,I18N,EVENTS#define CELLHALIGN "align", "char", "charoff"#define CELLVALIGN "valign"static const char* html_attrs[] = { ATTRS, NULL } ;static const char* core_i18n_attrs[] = { COREATTRS, I18N, NULL } ;static const char* core_attrs[] = { COREATTRS, NULL } ;static const char* i18n_attrs[] = { I18N, NULL } ;/* Other declarations that should go inline ... */static const char* a_attrs[] = { ATTRS, "charset", "type", "name",	"href", "hreflang", "rel", "rev", "accesskey", "shape", "coords",	"tabindex", "onfocus", "onblur", NULL } ;static const char* target_attr[] = { "target", NULL } ;static const char* rows_cols_attr[] = { "rows", "cols", NULL } ;static const char* alt_attr[] = { "alt", NULL } ;static const char* src_alt_attrs[] = { "src", "alt", NULL } ;static const char* href_attrs[] = { "href", NULL } ;static const char* clear_attrs[] = { "clear", NULL } ;static const char* inline_p[] = { INLINE, "p", NULL } ;static const char* flow_param[] = { FLOW, "param", NULL } ;static const char* applet_attrs[] = { COREATTRS , "codebase",		"archive", "alt", "name", "height", "width", "align",		"hspace", "vspace", NULL } ;static const char* area_attrs[] = { "shape", "coords", "href", "nohref",	"tabindex", "accesskey", "onfocus", "onblur", NULL } ;static const char* basefont_attrs[] =	{ "id", "size", "color", "face", NULL } ;static const char* quote_attrs[] = { ATTRS, "cite", NULL } ;static const char* body_contents[] = { FLOW, "ins", "del", NULL } ;static const char* body_attrs[] = { ATTRS, "onload", "onunload", NULL } ;static const char* body_depr[] = { "background", "bgcolor", "text",	"link", "vlink", "alink", NULL } ;static const char* button_attrs[] = { ATTRS, "name", "value", "type",	"disabled", "tabindex", "accesskey", "onfocus", "onblur", NULL } ;static const char* col_attrs[] = { ATTRS, "span", "width", CELLHALIGN, CELLVALIGN, NULL } ;static const char* col_elt[] = { "col", NULL } ;static const char* edit_attrs[] = { ATTRS, "datetime", "cite", NULL } ;static const char* compact_attrs[] = { ATTRS, "compact", NULL } ;static const char* dl_contents[] = { "dt", "dd", NULL } ;static const char* compact_attr[] = { "compact", NULL } ;static const char* label_attr[] = { "label", NULL } ;static const char* fieldset_contents[] = { FLOW, "legend" } ;static const char* font_attrs[] = { COREATTRS, I18N, "size", "color", "face" , NULL } ;static const char* form_contents[] = { HEADING, LIST, INLINE, "pre", "p", "div", "center", "noscript", "noframes", "blockquote", "isindex", "hr", "table", "fieldset", "address", NULL } ;static const char* form_attrs[] = { ATTRS, "method", "enctype", "accept", "name", "onsubmit", "onreset", "accept-charset", NULL } ;static const char* frame_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "noresize", "scrolling" , NULL } ;static const char* frameset_attrs[] = { COREATTRS, "rows", "cols", "onload", "onunload", NULL } ;static const char* frameset_contents[] = { "frameset", "frame", "noframes", NULL } ;static const char* head_attrs[] = { I18N, "profile", NULL } ;static const char* head_contents[] = { "title", "isindex", "base", "script", "style", "meta", "link", "object", NULL } ;static const char* hr_depr[] = { "align", "noshade", "size", "width", NULL } ;static const char* version_attr[] = { "version", NULL } ;static const char* html_content[] = { "head", "body", "frameset", NULL } ;static const char* iframe_attrs[] = { COREATTRS, "longdesc", "name", "src", "frameborder", "marginwidth", "marginheight", "scrolling", "align", "height", "width", NULL } ;static const char* img_attrs[] = { ATTRS, "longdesc", "name", "height", "width", "usemap", "ismap", NULL } ;static const char* input_attrs[] = { ATTRS, "type", "name", "value", "checked", "disabled", "readonly", "size", "maxlength", "src", "alt", "usemap", "ismap", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", "accept", NULL } ;static const char* prompt_attrs[] = { COREATTRS, I18N, "prompt", NULL } ;static const char* label_attrs[] = { ATTRS, "for", "accesskey", "onfocus", "onblur", NULL } ;static const char* legend_attrs[] = { ATTRS, "accesskey", NULL } ;static const char* align_attr[] = { "align", NULL } ;static const char* link_attrs[] = { ATTRS, "charset", "href", "hreflang", "type", "rel", "rev", "media", NULL } ;static const char* map_contents[] = { BLOCK, "area", NULL } ;static const char* name_attr[] = { "name", NULL } ;static const char* action_attr[] = { "action", NULL } ;static const char* blockli_elt[] = { BLOCK, "li", NULL } ;static const char* meta_attrs[] = { I18N, "http-equiv", "name", "scheme", NULL } ;static const char* content_attr[] = { "content", NULL } ;static const char* type_attr[] = { "type", NULL } ;static const char* noframes_content[] = { "body", FLOW MODIFIER, NULL } ;static const char* object_contents[] = { FLOW, "param", NULL } ;static const char* object_attrs[] = { ATTRS, "declare", "classid", "codebase", "data", "type", "codetype", "archive", "standby", "height", "width", "usemap", "name", "tabindex", NULL } ;static const char* object_depr[] = { "align", "border", "hspace", "vspace", NULL } ;static const char* ol_attrs[] = { "type", "compact", "start", NULL} ;static const char* option_elt[] = { "option", NULL } ;static const char* optgroup_attrs[] = { ATTRS, "disabled", NULL } ;static const char* option_attrs[] = { ATTRS, "disabled", "label", "selected", "value", NULL } ;static const char* param_attrs[] = { "id", "value", "valuetype", "type", NULL } ;static const char* width_attr[] = { "width", NULL } ;static const char* pre_content[] = { PHRASE, "tt", "i", "b", "u", "s", "strike", "a", "br", "script", "map", "q", "span", "bdo", "iframe", NULL } ;static const char* script_attrs[] = { "charset", "src", "defer", "event", "for", NULL } ;static const char* language_attr[] = { "language", NULL } ;static const char* select_content[] = { "optgroup", "option", NULL } ;static const char* select_attrs[] = { ATTRS, "name", "size", "multiple", "disabled", "tabindex", "onfocus", "onblur", "onchange", NULL } ;static const char* style_attrs[] = { I18N, "media", "title", NULL } ;static const char* table_attrs[] = { ATTRS "summary", "width", "border", "frame", "rules", "cellspacing", "cellpadding", "datapagesize", NULL } ;static const char* table_depr[] = { "align", "bgcolor", NULL } ;static const char* table_contents[] = { "caption", "col", "colgroup", "thead", "tfoot", "tbody", "tr", NULL} ;static const char* tr_elt[] = { "tr", NULL } ;static const char* talign_attrs[] = { ATTRS, CELLHALIGN, CELLVALIGN, NULL} ;static const char* th_td_depr[] = { "nowrap", "bgcolor", "width", "height", NULL } ;static const char* th_td_attr[] = { ATTRS, "abbr", "axis", "headers", "scope", "rowspan", "colspan", CELLHALIGN, CELLVALIGN, NULL } ;static const char* textarea_attrs[] = { ATTRS, "name", "disabled", "readonly", "tabindex", "accesskey", "onfocus", "onblur", "onselect", "onchange", NULL } ;static const char* tr_contents[] = { "th", "td", NULL } ;static const char* bgcolor_attr[] = { "bgcolor", NULL } ;static const char* li_elt[] = { "li", NULL } ;static const char* ul_depr[] = { "type", "compact", NULL} ;static const char* dir_attr[] = { "dir", NULL} ;#define DECL (const char**)static const htmlElemDeschtml40ElementTable[] = {{ "a",		0, 0, 0, 0, 0, 0, 1, "anchor ",	DECL html_inline , NULL , DECL a_attrs , DECL target_attr, NULL},{ "abbr",	0, 0, 0, 0, 0, 0, 1, "abbreviated form",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "acronym",	0, 0, 0, 0, 0, 0, 1, "",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "address",	0, 0, 0, 0, 0, 0, 0, "information on author ",	DECL inline_p  , NULL , DECL html_attrs, NULL, NULL},{ "applet",	0, 0, 0, 0, 1, 1, 2, "java applet ",	DECL flow_param , NULL , NULL , DECL applet_attrs, NULL},{ "area",	0, 2, 2, 1, 0, 0, 0, "client-side image map area ",	EMPTY ,  NULL , DECL area_attrs , DECL target_attr, DECL alt_attr},{ "b",		0, 3, 0, 0, 0, 0, 1, "bold text style",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "base",	0, 2, 2, 1, 0, 0, 0, "document base uri ",	EMPTY , NULL , NULL , DECL target_attr, DECL href_attrs},{ "basefont",	0, 2, 2, 1, 1, 1, 1, "base font size " ,	EMPTY , NULL , NULL, DECL basefont_attrs, NULL},{ "bdo",	0, 0, 0, 0, 0, 0, 1, "i18n bidi over-ride ",	DECL html_inline , NULL , DECL core_i18n_attrs, NULL, DECL dir_attr},{ "big",	0, 3, 0, 0, 0, 0, 1, "large text style",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "blockquote",	0, 0, 0, 0, 0, 0, 0, "long quotation ",	DECL html_flow , NULL , DECL quote_attrs , NULL, NULL},{ "body",	1, 1, 0, 0, 0, 0, 0, "document body ",	DECL body_contents , "div" , DECL body_attrs, DECL body_depr, NULL},{ "br",		0, 2, 2, 1, 0, 0, 1, "forced line break ",	EMPTY , NULL , DECL core_attrs, DECL clear_attrs , NULL},{ "button",	0, 0, 0, 0, 0, 0, 2, "push button ",	DECL html_flow MODIFIER , NULL , DECL button_attrs, NULL, NULL},{ "caption",	0, 0, 0, 0, 0, 0, 0, "table caption ",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "center",	0, 3, 0, 0, 1, 1, 0, "shorthand for div align=center ",	DECL html_flow , NULL , NULL, DECL html_attrs, NULL},{ "cite",	0, 0, 0, 0, 0, 0, 1, "citation",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "code",	0, 0, 0, 0, 0, 0, 1, "computer code fragment",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "col",	0, 2, 2, 1, 0, 0, 0, "table column ",	EMPTY , NULL , DECL col_attrs , NULL, NULL},{ "colgroup",	0, 1, 0, 0, 0, 0, 0, "table column group ",	DECL col_elt , "col" , DECL col_attrs , NULL, NULL},{ "dd",		0, 1, 0, 0, 0, 0, 0, "definition description ",	DECL html_flow , NULL , DECL html_attrs, NULL, NULL},{ "del",	0, 0, 0, 0, 0, 0, 2, "deleted text ",	DECL html_flow , NULL , DECL edit_attrs , NULL, NULL},{ "dfn",	0, 0, 0, 0, 0, 0, 1, "instance definition",	DECL html_inline , NULL , DECL html_attrs, NULL, NULL},{ "dir",	0, 0, 0, 0, 1, 1, 0, "directory list",	DECL blockli_elt, "li" , NULL, DECL compact_attrs, NULL},{ "div",	0, 0, 0, 0, 0, 0, 0, "generic language/style container",	DECL html_flow, NULL, DECL html_attrs, DECL align_attr, NULL},{ "dl",		0, 0, 0, 0, 0, 0, 0, "definition list ",	DECL dl_contents , "dd" , html_attrs, DECL compact_attr, NULL},{ "dt",		0, 1, 0, 0, 0, 0, 0, "definition term ",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "em",		0, 3, 0, 0, 0, 0, 1, "emphasis",	DECL html_inline, NULL, DECL html_attrs, NULL, NULL},{ "fieldset",	0, 0, 0, 0, 0, 0, 0, "form control group ",	DECL fieldset_contents , NULL, DECL html_attrs, NULL, NULL},{ "font",	0, 3, 0, 0, 1, 1, 1, "local change to font ",	DECL html_inline, NULL, NULL, DECL font_attrs, NULL},{ "form",	0, 0, 0, 0, 0, 0, 0, "interactive form ",	DECL form_contents, "fieldset", DECL form_attrs , DECL target_attr, DECL action_attr},{ "frame",	0, 2, 2, 1, 0, 2, 0, "subwindow " ,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -