📄 html.c
字号:
/* +----------------------------------------------------------------------+ | PHP Version 4 | +----------------------------------------------------------------------+ | Copyright (c) 1997-2007 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Authors: Rasmus Lerdorf <rasmus@php.net> | | Jaakko Hyv鋞ti <jaakko.hyvatti@iki.fi> | | Wez Furlong <wez@thebrainroom.com> | +----------------------------------------------------------------------+*//* $Id: html.c,v 1.63.2.23.2.4 2007/01/01 09:46:48 sebastian Exp $ *//* * HTML entity resources: * * http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset2.asp * http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset3.asp * http://www.unicode.org/Public/MAPPINGS/OBSOLETE/UNI2SGML.TXT * * http://www.w3.org/TR/2002/REC-xhtml1-20020801/dtds.html#h-A2 * */#include "php.h"#if PHP_WIN32#include "config.w32.h"#else#include <php_config.h>#endif#include "reg.h"#include "html.h"#include "php_string.h"#include "SAPI.h"#if HAVE_LOCALE_H#include <locale.h>#endif#if HAVE_LANGINFO_H#include <langinfo.h>#endif#if HAVE_MBSTRING# include "ext/mbstring/mbstring.h"ZEND_EXTERN_MODULE_GLOBALS(mbstring)#endifenum entity_charset { cs_terminator, cs_8859_1, cs_cp1252, cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, cs_koi8r, cs_cp1251, cs_8859_5, cs_cp866 };typedef const char *entity_table_t;/* codepage 1252 is a Windows extension to iso-8859-1. */static entity_table_t ent_cp_1252[] = { "euro", NULL, "sbquo", "fnof", "bdquo", "hellip", "dagger", "Dagger", "circ", "permil", "Scaron", "lsaquo", "OElig", NULL, NULL, NULL, NULL, "lsquo", "rsquo", "ldquo", "rdquo", "bull", "ndash", "mdash", "tilde", "trade", "scaron", "rsaquo", "oelig", NULL, NULL, "Yuml" };static entity_table_t ent_iso_8859_1[] = { "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro", "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14", "frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml"};static entity_table_t ent_iso_8859_15[] = { "nbsp", "iexcl", "cent", "pound", "euro", "yen", "Scaron", "sect", "scaron", "copy", "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", NULL, /* Zcaron */ "micro", "para", "middot", NULL, /* zcaron */ "sup1", "ordm", "raquo", "OElig", "oelig", "Yuml", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml"};static entity_table_t ent_uni_338_402[] = { /* 338 */ "OElig", "oelig", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 352 */ "Scaron", "scaron", /* 354 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 376 */ "Yuml", /* 377 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 402 */ "fnof"};static entity_table_t ent_uni_spacing[] = { /* 710 */ "circ", /* 711 - 730 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 731 - 732 */ NULL, "tilde"};static entity_table_t ent_uni_greek[] = { /* 913 */ "Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Zeta", "Eta", "Theta", "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi", "Omicron", "Pi", "Rho", NULL, "Sigma", "Tau", "Upsilon", "Phi", "Chi", "Psi", "Omega", /* 938 - 944 are not mapped */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta", "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi", "rho", "sigmaf", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega", /* 970 - 976 are not mapped */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, "thetasym", "upsih", NULL, NULL, NULL, "piv" };static entity_table_t ent_uni_punct[] = { /* 8194 */ "ensp", "emsp", NULL, NULL, NULL, NULL, NULL, "thinsp", NULL, NULL, "zwnj", "zwj", "lrm", "rlm", NULL, NULL, NULL, "ndash", "mdash", NULL, NULL, NULL, "lsquo", "rsquo", "sbquo", NULL, "ldquo", "rdquo", "bdquo", NULL, "dagger", "Dagger", "bull", NULL, NULL, NULL, "hellip", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "permil", NULL, "prime", "Prime", NULL, NULL, NULL, NULL, NULL, "lsaquo", "rsaquo", NULL, NULL, NULL, "oline", NULL, NULL, NULL, NULL, NULL, "frasl"};static entity_table_t ent_uni_euro[] = { "euro"};static entity_table_t ent_uni_8465_8501[] = { /* 8465 */ "image", NULL, NULL, NULL, NULL, NULL, NULL, /* 8472 */ "weierp", NULL, NULL, NULL, /* 8476 */ "real", NULL, NULL, NULL, NULL, NULL, /* 8482 */ "trade", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8501 */ "alefsym",};static entity_table_t ent_uni_8592_9002[] = { /* 8592 (0x2190) */ "larr", "uarr", "rarr", "darr", "harr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8608 (0x21a0) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8624 (0x21b0) */ NULL, NULL, NULL, NULL, NULL, "crarr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8640 (0x21c0) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8656 (0x21d0) */ "lArr", "uArr", "rArr", "dArr", "hArr", "vArr", NULL, NULL, NULL, NULL, "lAarr", "rAarr", NULL, "rarrw", NULL, NULL, /* 8672 (0x21e0) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8704 (0x2200) */ "forall", "comp", "part", "exist", "nexist", "empty", NULL, "nabla", "isin", "notin", "epsis", "ni", "notni", "bepsi", NULL, "prod", /* 8720 (0x2210) */ "coprod", "sum", "minus", "mnplus", "plusdo", NULL, "setmn", "lowast", "compfn", NULL, "radic", NULL, NULL, "prop", "infin", "ang90", /* 8736 (0x2220) */ "ang", "angmsd", "angsph", "mid", "nmid", "par", "npar", "and", "or", "cap", "cup", "int", NULL, NULL, "conint", NULL, /* 8752 (0x2230) */ NULL, NULL, NULL, NULL, "there4", "becaus", NULL, NULL, NULL, NULL, NULL, NULL, "sim", "bsim", NULL, NULL, /* 8768 (0x2240) */ "wreath", "nsim", NULL, "sime", "nsime", "cong", NULL, "ncong", "asymp", "nap", "ape", NULL, "bcong", "asymp", "bump", "bumpe", /* 8784 (0x2250) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8800 (0x2260) */ "ne", "equiv", NULL, NULL, "le", "ge", "lE", "gE", "lnE", "gnE", "Lt", "Gt", "twixt", NULL, "nlt", "ngt", /* 8816 (0x2270) */ "nles", "nges", "lsim", "gsim", NULL, NULL, "lg", "gl", NULL, NULL, "pr", "sc", "cupre", "sscue", "prsim", "scsim", /* 8832 (0x2280) */ "npr", "nsc", "sub", "sup", "nsub", "nsup", "sube", "supe", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8848 (0x2290) */ NULL, NULL, NULL, NULL, NULL, "oplus", NULL, "otimes", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8864 (0x22a0) */ NULL, NULL, NULL, NULL, NULL, "perp", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8880 (0x22b0) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8896 (0x22c0) */ NULL, NULL, NULL, NULL, NULL, "sdot", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8912 (0x22d0) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8928 (0x22e0) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8944 (0x22f0) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8960 (0x2300) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "lceil", "rceil", "lfloor", "rfloor", NULL, NULL, NULL, NULL, /* 8976 (0x2310) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8992 (0x2320) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "lang", "rang"};static entity_table_t ent_uni_9674[] = { /* 9674 */ "loz"};static entity_table_t ent_uni_9824_9830[] = { /* 9824 */ "spades", NULL, NULL, "clubs", NULL, "hearts", "diams"};static entity_table_t ent_koi8r[] = { "#1105", /* "jo "*/ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "#1025", /* "JO" */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "#1102", "#1072", "#1073", "#1094", "#1076", "#1077", "#1092", "#1075", "#1093", "#1080", "#1081", "#1082", "#1083", "#1084", "#1085", "#1086", "#1087", "#1103", "#1088", "#1089", "#1090", "#1091", "#1078", "#1074", "#1100", "#1099", "#1079", "#1096", "#1101", "#1097", "#1095", "#1098", "#1070", "#1040", "#1041", "#1062", "#1044", "#1045", "#1060", "#1043", "#1061", "#1048", "#1049", "#1050", "#1051", "#1052", "#1053", "#1054", "#1055", "#1071", "#1056", "#1057", "#1058", "#1059", "#1046", "#1042", "#1068", "#1067", "#1047", "#1064", "#1069", "#1065", "#1063", "#1066"};static entity_table_t ent_cp_1251[] = { "#1026", "#1027", "#8218", "#1107", "#8222", "hellip", "dagger", "Dagger", "euro", "permil", "#1033", "#8249", "#1034", "#1036", "#1035", "#1039", "#1106", "#8216", "#8217", "#8219", "#8220", "bull", "ndash", "mdash", NULL, "trade", "#1113", "#8250", "#1114", "#1116", "#1115", "#1119", "nbsp", "#1038", "#1118", "#1032", "curren", "#1168", "brvbar", "sect", "#1025", "copy", "#1028", "laquo", "not", "shy", "reg", "#1031", "deg", "plusmn", "#1030", "#1110", "#1169", "micro", "para", "middot", "#1105", "#8470", "#1108", "raquo", "#1112", "#1029", "#1109", "#1111", "#1040", "#1041", "#1042", "#1043", "#1044", "#1045", "#1046", "#1047", "#1048", "#1049", "#1050", "#1051", "#1052", "#1053", "#1054", "#1055", "#1056", "#1057", "#1058", "#1059", "#1060", "#1061", "#1062", "#1063", "#1064", "#1065", "#1066", "#1067", "#1068", "#1069", "#1070", "#1071", "#1072", "#1073", "#1074", "#1075", "#1076", "#1077", "#1078", "#1079", "#1080", "#1081", "#1082", "#1083", "#1084", "#1085", "#1086", "#1087", "#1088", "#1089", "#1090", "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097", "#1098", "#1099", "#1100", "#1101", "#1102", "#1103"};static entity_table_t ent_iso_8859_5[] = { "#1056", "#1057", "#1058", "#1059", "#1060", "#1061", "#1062", "#1063", "#1064", "#1065", "#1066", "#1067", "#1068", "#1069", "#1070", "#1071", "#1072", "#1073", "#1074", "#1075", "#1076", "#1077", "#1078", "#1079", "#1080", "#1081", "#1082", "#1083", "#1084", "#1085", "#1086", "#1087", "#1088", "#1089", "#1090", "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097", "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1104", "#1105", "#1106", "#1107", "#1108", "#1109", "#1110", "#1111", "#1112", "#1113", "#1114", "#1115", "#1116", "#1117", "#1118", "#1119"};static entity_table_t ent_cp_866[] = { "#9492", "#9524", "#9516", "#9500", "#9472", "#9532", "#9566", "#9567", "#9562", "#9556", "#9577", "#9574", "#9568", "#9552", "#9580", "#9575", "#9576", "#9572", "#9573", "#9561", "#9560", "#9554", "#9555", "#9579", "#9578", "#9496", "#9484", "#9608", "#9604", "#9612", "#9616", "#9600", "#1088", "#1089", "#1090", "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097", "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1025", "#1105", "#1028", "#1108", "#1031", "#1111", "#1038", "#1118", "#176", "#8729", "#183", "#8730", "#8470", "#164", "#9632", "#160"};struct html_entity_map { enum entity_charset charset; /* charset identifier */ unsigned short basechar; /* char code at start of table */ unsigned short endchar; /* last char code in the table */ entity_table_t *table; /* the table of mappings */};static const struct html_entity_map entity_map[] = { { cs_cp1252, 0x80, 0x9f, ent_cp_1252 }, { cs_cp1252, 0xa0, 0xff, ent_iso_8859_1 }, { cs_8859_1, 0xa0, 0xff, ent_iso_8859_1 }, { cs_8859_15, 0xa0, 0xff, ent_iso_8859_15 }, { cs_utf_8, 0xa0, 0xff, ent_iso_8859_1 }, { cs_utf_8, 338, 402, ent_uni_338_402 }, { cs_utf_8, 710, 732, ent_uni_spacing }, { cs_utf_8, 913, 982, ent_uni_greek }, { cs_utf_8, 8194, 8260, ent_uni_punct }, { cs_utf_8, 8364, 8364, ent_uni_euro },
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -