⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.c

📁 SIP 1.5.0源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
/** * htmlInitAutoClose: * * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. * */voidhtmlInitAutoClose(void) {    int index, i = 0;    if (htmlStartCloseIndexinitialized) return;    for (index = 0;index < 100;index ++) htmlStartCloseIndex[index] = NULL;    index = 0;    while ((htmlStartClose[i] != NULL) && (index < 100 - 1)) {        htmlStartCloseIndex[index++] = &htmlStartClose[i];	while (htmlStartClose[i] != NULL) i++;	i++;    }}/** * htmlTagLookup: * @tag:  The tag name * * Lookup the HTML tag in the ElementTable * * Returns the related htmlElemDescPtr or NULL if not found. */htmlElemDescPtrhtmlTagLookup(const xmlChar *tag) {    int i = 0;    for (i = 0; i < (sizeof(html40ElementTable) /                     sizeof(html40ElementTable[0]));i++) {        if (!xmlStrcmp(tag, BAD_CAST html40ElementTable[i].name))	    return(&html40ElementTable[i]);    }    return(NULL);}/** * htmlCheckAutoClose: * @new:  The new tag name * @old:  The old tag name * * Checks wether the new tag is one of the registered valid tags for closing old. * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. * * Returns 0 if no, 1 if yes. */inthtmlCheckAutoClose(const xmlChar *new, const xmlChar *old) {    int i, index;    char **close;    if (htmlStartCloseIndexinitialized == 0) htmlInitAutoClose();    /* inefficient, but not a big deal */    for (index = 0; index < 100;index++) {        close = htmlStartCloseIndex[index];	if (close == NULL) return(0);	if (!xmlStrcmp(BAD_CAST *close, new)) break;    }    i = close - htmlStartClose;    i++;    while (htmlStartClose[i] != NULL) {        if (!xmlStrcmp(BAD_CAST htmlStartClose[i], old)) {	    return(1);	}	i++;    }    return(0);}/** * htmlAutoClose: * @ctxt:  an HTML parser context * @new:  The new tag name * * The HTmL DtD allows a tag to implicitely close other tags. * The list is kept in htmlStartClose array. This function is * called when a new tag has been detected and generates the * appropriates closes if possible/needed. */voidhtmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar *new) {    xmlChar *oldname;    while ((ctxt->name != NULL) &&            (htmlCheckAutoClose(new, ctxt->name))) {#ifdef DEBUG	fprintf(stderr,"htmlAutoClose: %s closes %s\n", new, ctxt->name);#endif	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))	    ctxt->sax->endElement(ctxt->userData, ctxt->name);	oldname = htmlnamePop(ctxt);	if (oldname != NULL) {#ifdef DEBUG	    fprintf(stderr,"htmlAutoClose: popped %s\n", oldname);#endif	    xmlFree(oldname);        }    }}/** * htmlAutoCloseTag: * @doc:  the HTML document * @name:  The tag name * @elem:  the HTML element * * The HTmL DtD allows a tag to implicitely close other tags. * The list is kept in htmlStartClose array. This function checks * if the element or one of it's children would autoclose the * given tag. * * Returns 1 if autoclose, 0 otherwise */inthtmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {    htmlNodePtr child;    if (elem == NULL) return(1);    if (!xmlStrcmp(name, elem->name)) return(0);    if (htmlCheckAutoClose(elem->name, name)) return(1);    child = elem->children;    while (child != NULL) {        if (htmlAutoCloseTag(doc, name, child)) return(1);	child = child->next;    }    return(0);}/** * htmlIsAutoClosed: * @doc:  the HTML document * @elem:  the HTML element * * The HTmL DtD allows a tag to implicitely close other tags. * The list is kept in htmlStartClose array. This function checks * if a tag is autoclosed by one of it's child * * Returns 1 if autoclosed, 0 otherwise */inthtmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {    htmlNodePtr child;    if (elem == NULL) return(1);    child = elem->children;    while (child != NULL) {	if (htmlAutoCloseTag(doc, elem->name, child)) return(1);	child = child->next;    }    return(0);}/** * htmlAutoCloseOnClose: * @ctxt:  an HTML parser context * @new:  The new tag name * * The HTmL DtD allows an ending tag to implicitely close other tags. */voidhtmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *new) {    htmlElemDescPtr info;    xmlChar *oldname;    int i;#ifdef DEBUG    fprintf(stderr,"Close of %s stack: %d elements\n", new, ctxt->nameNr);    for (i = 0;i < ctxt->nameNr;i++)         fprintf(stderr,"%d : %s\n", i, ctxt->nameTab[i]);#endif    for (i = (ctxt->nameNr - 1);i >= 0;i--) {        if (!xmlStrcmp(new, ctxt->nameTab[i])) break;    }    if (i < 0) return;    while (xmlStrcmp(new, ctxt->name)) {	info = htmlTagLookup(ctxt->name);	if ((info == NULL) || (info->endTag == 1)) {#ifdef DEBUG	    fprintf(stderr,"htmlAutoCloseOnClose: %s closes %s\n", new, ctxt->name);#endif        } else {	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))		ctxt->sax->error(ctxt->userData,		 "Opening and ending tag mismatch: %s and %s\n",		                 new, ctxt->name);	    ctxt->wellFormed = 0;	}	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))	    ctxt->sax->endElement(ctxt->userData, ctxt->name);	oldname = htmlnamePop(ctxt);	if (oldname != NULL) {#ifdef DEBUG	    fprintf(stderr,"htmlAutoCloseOnClose: popped %s\n", oldname);#endif	    xmlFree(oldname);	}	    }}/************************************************************************ *									* * 		The list of HTML predefined entities			* *									* ************************************************************************/htmlEntityDesc  html40EntitiesTable[] = {/* * the 4 absolute ones, */{ 34,	"quot",	"quotation mark = APL quote, U+0022 ISOnum" },{ 38,	"amp",	"ampersand, U+0026 ISOnum" },{ 60,	"lt",	"less-than sign, U+003C ISOnum" },{ 62,	"gt",	"greater-than sign, U+003E ISOnum" },/* * A bunch still in the 128-255 range * Replacing them depend really on the charset used. */{ 39,	"apos",	"single quote" },{ 160,	"nbsp",	"no-break space = non-breaking space, U+00A0 ISOnum" },{ 161,	"iexcl","inverted exclamation mark, U+00A1 ISOnum" },{ 162,	"cent",	"cent sign, U+00A2 ISOnum" },{ 163,	"pound","pound sign, U+00A3 ISOnum" },{ 164,	"curren","currency sign, U+00A4 ISOnum" },{ 165,	"yen",	"yen sign = yuan sign, U+00A5 ISOnum" },{ 166,	"brvbar","broken bar = broken vertical bar, U+00A6 ISOnum" },{ 167,	"sect",	"section sign, U+00A7 ISOnum" },{ 168,	"uml",	"diaeresis = spacing diaeresis, U+00A8 ISOdia" },{ 169,	"copy",	"copyright sign, U+00A9 ISOnum" },{ 170,	"ordf",	"feminine ordinal indicator, U+00AA ISOnum" },{ 171,	"laquo","left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum" },{ 172,	"not",	"not sign, U+00AC ISOnum" },{ 173,	"shy",	"soft hyphen = discretionary hyphen, U+00AD ISOnum" },{ 174,	"reg",	"registered sign = registered trade mark sign, U+00AE ISOnum" },{ 175,	"macr",	"macron = spacing macron = overline = APL overbar, U+00AF ISOdia" },{ 176,	"deg",	"degree sign, U+00B0 ISOnum" },{ 177,	"plusmn","plus-minus sign = plus-or-minus sign, U+00B1 ISOnum" },{ 178,	"sup2",	"superscript two = superscript digit two = squared, U+00B2 ISOnum" },{ 179,	"sup3",	"superscript three = superscript digit three = cubed, U+00B3 ISOnum" },{ 180,	"acute","acute accent = spacing acute, U+00B4 ISOdia" },{ 181,	"micro","micro sign, U+00B5 ISOnum" },{ 182,	"para",	"pilcrow sign = paragraph sign, U+00B6 ISOnum" },{ 183,	"middot","middle dot = Georgian comma Greek middle dot, U+00B7 ISOnum" },{ 184,	"cedil","cedilla = spacing cedilla, U+00B8 ISOdia" },{ 185,	"sup1",	"superscript one = superscript digit one, U+00B9 ISOnum" },{ 186,	"ordm",	"masculine ordinal indicator, U+00BA ISOnum" },{ 187,	"raquo","right-pointing double angle quotation mark right pointing guillemet, U+00BB ISOnum" },{ 188,	"frac14","vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum" },{ 189,	"frac12","vulgar fraction one half = fraction one half, U+00BD ISOnum" },{ 190,	"frac34","vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum" },{ 191,	"iquest","inverted question mark = turned question mark, U+00BF ISOnum" },{ 192,	"Agrave","latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1" },{ 193,	"Aacute","latin capital letter A with acute, U+00C1 ISOlat1" },{ 194,	"Acirc","latin capital letter A with circumflex, U+00C2 ISOlat1" },{ 195,	"Atilde","latin capital letter A with tilde, U+00C3 ISOlat1" },{ 196,	"Auml",	"latin capital letter A with diaeresis, U+00C4 ISOlat1" },{ 197,	"Aring","latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1" },{ 198,	"AElig","latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1" },{ 199,	"Ccedil","latin capital letter C with cedilla, U+00C7 ISOlat1" },{ 200,	"Egrave","latin capital letter E with grave, U+00C8 ISOlat1" },{ 201,	"Eacute","latin capital letter E with acute, U+00C9 ISOlat1" },{ 202,	"Ecirc","latin capital letter E with circumflex, U+00CA ISOlat1" },{ 203,	"Euml",	"latin capital letter E with diaeresis, U+00CB ISOlat1" },{ 204,	"Igrave","latin capital letter I with grave, U+00CC ISOlat1" },{ 205,	"Iacute","latin capital letter I with acute, U+00CD ISOlat1" },{ 206,	"Icirc","latin capital letter I with circumflex, U+00CE ISOlat1" },{ 207,	"Iuml",	"latin capital letter I with diaeresis, U+00CF ISOlat1" },{ 208,	"ETH",	"latin capital letter ETH, U+00D0 ISOlat1" },{ 209,	"Ntilde","latin capital letter N with tilde, U+00D1 ISOlat1" },{ 210,	"Ograve","latin capital letter O with grave, U+00D2 ISOlat1" },{ 211,	"Oacute","latin capital letter O with acute, U+00D3 ISOlat1" },{ 212,	"Ocirc","latin capital letter O with circumflex, U+00D4 ISOlat1" },{ 213,	"Otilde","latin capital letter O with tilde, U+00D5 ISOlat1" },{ 214,	"Ouml",	"latin capital letter O with diaeresis, U+00D6 ISOlat1" },{ 215,	"times","multiplication sign, U+00D7 ISOnum" },{ 216,	"Oslash","latin capital letter O with stroke latin capital letter O slash, U+00D8 ISOlat1" },{ 217,	"Ugrave","latin capital letter U with grave, U+00D9 ISOlat1" },{ 218,	"Uacute","latin capital letter U with acute, U+00DA ISOlat1" },{ 219,	"Ucirc","latin capital letter U with circumflex, U+00DB ISOlat1" },{ 220,	"Uuml",	"latin capital letter U with diaeresis, U+00DC ISOlat1" },{ 221,	"Yacute","latin capital letter Y with acute, U+00DD ISOlat1" },{ 222,	"THORN","latin capital letter THORN, U+00DE ISOlat1" },{ 223,	"szlig","latin small letter sharp s = ess-zed, U+00DF ISOlat1" },{ 224,	"agrave","latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1" },{ 225,	"aacute","latin small letter a with acute, U+00E1 ISOlat1" },{ 226,	"acirc","latin small letter a with circumflex, U+00E2 ISOlat1" },{ 227,	"atilde","latin small letter a with tilde, U+00E3 ISOlat1" },{ 228,	"auml",	"latin small letter a with diaeresis, U+00E4 ISOlat1" },{ 229,	"aring","latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1" },{ 230,	"aelig","latin small letter ae = latin small ligature ae, U+00E6 ISOlat1" },{ 231,	"ccedil","latin small letter c with cedilla, U+00E7 ISOlat1" },{ 232,	"egrave","latin small letter e with grave, U+00E8 ISOlat1" },{ 233,	"eacute","latin small letter e with acute, U+00E9 ISOlat1" },{ 234,	"ecirc","latin small letter e with circumflex, U+00EA ISOlat1" },{ 235,	"euml",	"latin small letter e with diaeresis, U+00EB ISOlat1" },{ 236,	"igrave","latin small letter i with grave, U+00EC ISOlat1" },{ 237,	"iacute","latin small letter i with acute, U+00ED ISOlat1" },{ 238,	"icirc","latin small letter i with circumflex, U+00EE ISOlat1" },{ 239,	"iuml",	"latin small letter i with diaeresis, U+00EF ISOlat1" },{ 240,	"eth",	"latin small letter eth, U+00F0 ISOlat1" },{ 241,	"ntilde","latin small letter n with tilde, U+00F1 ISOlat1" },{ 242,	"ograve","latin small letter o with grave, U+00F2 ISOlat1" },{ 243,	"oacute","latin small letter o with acute, U+00F3 ISOlat1" },{ 244,	"ocirc","latin small letter o with circumflex, U+00F4 ISOlat1" },{ 245,	"otilde","latin small letter o with tilde, U+00F5 ISOlat1" },{ 246,	"ouml",	"latin small letter o with diaeresis, U+00F6 ISOlat1" },{ 247,	"divide","division sign, U+00F7 ISOnum" },{ 248,	"oslash","latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1" },{ 249,	"ugrave","latin small letter u with grave, U+00F9 ISOlat1" },{ 250,	"uacute","latin small letter u with acute, U+00FA ISOlat1" },{ 251,	"ucirc","latin small letter u with circumflex, U+00FB ISOlat1" },{ 252,	"uuml",	"latin small letter u with diaeresis, U+00FC ISOlat1" },{ 253,	"yacute","latin small letter y with acute, U+00FD ISOlat1" },{ 254,	"thorn","latin small letter thorn with, U+00FE ISOlat1" },{ 255,	"yuml",	"latin small letter y with diaeresis, U+00FF ISOlat1" },/* * Anything below should really be kept as entities references */{ 402,	"fnof",	"latin small f with hook = function = florin, U+0192 ISOtech" },{ 913,	"Alpha","greek capital letter alpha, U+0391" },{ 914,	"Beta",	"greek capital letter beta, U+0392" },{ 915,	"Gamma","greek capital letter gamma, U+0393 ISOgrk3" },{ 916,	"Delta","greek capital letter delta, U+0394 ISOgrk3" },{ 917,	"Epsilon","greek capital letter epsilon, U+0395" },{ 918,	"Zeta",	"greek capital letter zeta, U+0396" },{ 919,	"Eta",	"greek capital letter eta, U+0397" },{ 920,	"Theta","greek capital letter theta, U+0398 ISOgrk3" },{ 921,	"Iota",	"greek capital letter iota, U+0399" },{ 922,	"Kappa","greek capital letter kappa, U+039A" },{ 923,	"Lambda""greek capital letter lambda, U+039B ISOgrk3" },{ 924,	"Mu",	"greek capital letter mu, U+039C" },{ 925,	"Nu",	"greek capital letter nu, U+039D" },{ 926,	"Xi",	"greek capital letter xi, U+039E ISOgrk3" },{ 927,	"Omicron","greek capital letter omicron, U+039F" },{ 928,	"Pi",	"greek capital letter pi, U+03A0 ISOgrk3" },{ 929,	"Rho",	"greek capital letter rho, U+03A1" },{ 931,	"Sigma","greek capital letter sigma, U+03A3 ISOgrk3" },{ 932,	"Tau",	"greek capital letter tau, U+03A4" },{ 933,	"Upsilon","greek capital letter upsilon, U+03A5 ISOgrk3" },{ 934,	"Phi",	"greek capital letter phi, U+03A6 ISOgrk3" },{ 935,	"Chi",	"greek capital letter chi, U+03A7" },{ 936,	"Psi",	"greek capital letter psi, U+03A8 ISOgrk3" },{ 937,	"Omega","greek capital letter omega, U+03A9 ISOgrk3" },{ 945,	"alpha","greek small letter alpha, U+03B1 ISOgrk3" },{ 946,	"beta",	"greek small letter beta, U+03B2 ISOgrk3" },{ 947,	"gamma","greek small letter gamma, U+03B3 ISOgrk3" },{ 948,	"delta","greek small letter delta, U+03B4 ISOgrk3" },{ 949,	"epsilon","greek small letter epsilon, U+03B5 ISOgrk3" },{ 950,	"zeta",	"greek small letter zeta, U+03B6 ISOgrk3" },{ 951,	"eta",	"greek small letter eta, U+03B7 ISOgrk3" },{ 952,	"theta","greek small letter theta, U+03B8 ISOgrk3" },{ 953,	"iota",	"greek small letter iota, U+03B9 ISOgrk3" },{ 954,	"kappa","greek small letter kappa, U+03BA ISOgrk3" },{ 955,	"lambda","greek small letter lambda, U+03BB ISOgrk3" },{ 956,	"mu",	"greek small letter mu, U+03BC ISOgrk3" },{ 957,	"nu",	"greek small letter nu, U+03BD ISOgrk3" },{ 958,	"xi",	"greek small letter xi, U+03BE ISOgrk3" },{ 959,	"omicron","greek small letter omicron, U+03BF NEW" },{ 960,	"pi",	"greek small letter pi, U+03C0 ISOgrk3" },{ 961,	"rho",	"greek small letter rho, U+03C1 ISOgrk3" },{ 962,	"sigmaf","greek small letter final sigma, U+03C2 ISOgrk3" },{ 963,	"sigma","greek small letter sigma, U+03C3 ISOgrk3" },{ 964,	"tau",	"greek small letter tau, U+03C4 ISOgrk3" },{ 965,	"upsilon","greek small letter upsilon, U+03C5 ISOgrk3" },{ 966,	"phi",	"greek small letter phi, U+03C6 ISOgrk3" },{ 967,	"chi",	"greek small letter chi, U+03C7 ISOgrk3" },{ 968,	"psi",	"greek small letter psi, U+03C8 ISOgrk3" },{ 969,	"omega","greek small letter omega, U+03C9 ISOgrk3" },{ 977,	"thetasym","greek small letter theta symbol, U+03D1 NEW" },{ 978,	"upsih","greek upsilon with hook symbol, U+03D2 NEW" },{ 982,	"piv",	"greek pi symbol, U+03D6 ISOgrk3" },{ 8226,	"bull",	"bullet = black small circle, U+2022 ISOpub" },{ 8230,	"hellip","horizontal ellipsis = three dot leader, U+2026 ISOpub" },{ 8242,	"prime","prime = minutes = feet, U+2032 ISOtech" },{ 8243,	"Prime","double prime = seconds = inches, U+2033 ISOtech" },{ 8254,	"oline","overline = spacing overscore, U+203E NEW" },{ 8260,	"frasl","fraction slash, U+2044 NEW" },{ 8472,	"weierp","script capital P = power set = Weierstrass p, U+2118 ISOamso" },{ 8465,	"image","blackletter capital I = imaginary part, U+2111 ISOamso" },{ 8476,	"real",	"blackletter capital R = real part symbol, U+211C ISOamso" },{ 8482,	"trade","trade mark sign, U+2122 ISOnum" },{ 8501,	"alefsym","alef symbol = first transfinite cardinal, U+2135 NEW" },{ 8592,	"larr",	"leftwards arrow, U+2190 ISOnum" },{ 8593,	"uarr",	"upwards arrow, U+2191 ISOnum" },{ 8594,	"rarr",	"rightwards arrow, U+2192 ISOnum" },{ 8595,	"darr",	"downwards arrow, U+2193 ISOnum" },

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -