📄 html.c

📁 php-4.4.7学习linux时下载的源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
	{ cs_utf_8, 		8465, 8501, ent_uni_8465_8501 },	{ cs_utf_8, 		8592, 9002, ent_uni_8592_9002 },	{ cs_utf_8, 		9674, 9674, ent_uni_9674 },	{ cs_utf_8, 		9824, 9830, ent_uni_9824_9830 },	{ cs_big5, 			0xa0, 0xff, ent_iso_8859_1 },	{ cs_gb2312, 		0xa0, 0xff, ent_iso_8859_1 },	{ cs_big5hkscs, 	0xa0, 0xff, ent_iso_8859_1 }, 	{ cs_sjis,			0xa0, 0xff, ent_iso_8859_1 }, 	{ cs_eucjp,			0xa0, 0xff, ent_iso_8859_1 },	{ cs_koi8r,		    0xa3, 0xff, ent_koi8r },	{ cs_cp1251,		0x80, 0xff, ent_cp_1251 },	{ cs_8859_5,		0xc0, 0xff, ent_iso_8859_5 },	{ cs_cp866,		    0xc0, 0xff, ent_cp_866 },	{ cs_terminator }};static const struct {	const char *codeset;	enum entity_charset charset;} charset_map[] = {	{ "ISO-8859-1", 	cs_8859_1 },	{ "ISO8859-1",	 	cs_8859_1 },	{ "ISO-8859-15", 	cs_8859_15 },	{ "ISO8859-15", 	cs_8859_15 },	{ "utf-8", 			cs_utf_8 },	{ "cp1252", 		cs_cp1252 },	{ "Windows-1252", 	cs_cp1252 },	{ "1252",           cs_cp1252 }, 	{ "BIG5",			cs_big5 },	{ "950",            cs_big5 },	{ "GB2312",			cs_gb2312 },	{ "936",            cs_gb2312 },	{ "BIG5-HKSCS",		cs_big5hkscs },	{ "Shift_JIS",		cs_sjis },	{ "SJIS",   		cs_sjis },	{ "932",            cs_sjis },	{ "EUCJP",   		cs_eucjp },	{ "EUC-JP",   		cs_eucjp },	{ "KOI8-R",         cs_koi8r },	{ "koi8-ru",        cs_koi8r },	{ "koi8r",          cs_koi8r },	{ "cp1251",         cs_cp1251 },	{ "Windows-1251",   cs_cp1251 },	{ "win-1251",       cs_cp1251 },	{ "iso8859-5",      cs_8859_5 },	{ "iso-8859-5",     cs_8859_5 },	{ "cp866",          cs_cp866 },	{ "866",            cs_cp866 },    	{ "ibm866",         cs_cp866 },	{ NULL }};static const struct {	unsigned short charcode;	char *entity;	int entitylen;	int flags;} basic_entities[] = {	{ '"',	"&quot;",	6,	ENT_HTML_QUOTE_DOUBLE },	{ '\'',	"&#039;",	6,	ENT_HTML_QUOTE_SINGLE },	{ '\'',	"&#39;",	5,	ENT_HTML_QUOTE_SINGLE },	{ '<',	"&lt;",		4,	0 },	{ '>',	"&gt;",		4,	0 },	{ '&',	"&amp;",	5,	0 }, /* this should come last */	{ 0, NULL, 0, 0 }};	#define MB_RETURN { \			*newpos = pos;       \		  	mbseq[mbpos] = '\0'; \		  	*mbseqlen = mbpos;   \		  	return this_char; }					#define MB_WRITE(mbchar) { \			mbspace--;  \			if (mbspace == 0) {      \				MB_RETURN;           \			}                        \			mbseq[mbpos++] = (mbchar); }/* {{{ get_next_char */inline static unsigned short get_next_char(enum entity_charset charset,		unsigned char * str,		int * newpos,		unsigned char * mbseq,		int * mbseqlen){	int pos = *newpos;	int mbpos = 0;	int mbspace = *mbseqlen;	unsigned short this_char = str[pos++];		if (mbspace <= 0) {		*mbseqlen = 0;		return this_char;	}		MB_WRITE((unsigned char)this_char);		switch (charset) {		case cs_utf_8:			{				unsigned long utf = 0;				int stat = 0;				int more = 1;				/* unpack utf-8 encoding into a wide char.				 * Code stolen from the mbstring extension */				do {					if (this_char < 0x80) {						more = 0;						break;					} else if (this_char < 0xc0) {						switch (stat) {							case 0x10:	/* 2, 2nd */							case 0x21:	/* 3, 3rd */							case 0x32:	/* 4, 4th */							case 0x43:	/* 5, 5th */							case 0x54:	/* 6, 6th */								/* last byte in sequence */								more = 0;								utf |= (this_char & 0x3f);								this_char = (unsigned short)utf;								break;							case 0x20:	/* 3, 2nd */							case 0x31:	/* 4, 3rd */							case 0x42:	/* 5, 4th */							case 0x53:	/* 6, 5th */								/* penultimate char */								utf |= ((this_char & 0x3f) << 6);								stat++;								break;							case 0x30:	/* 4, 2nd */							case 0x41:	/* 5, 3rd */							case 0x52:	/* 6, 4th */								utf |= ((this_char & 0x3f) << 12);								stat++;								break;							case 0x40:	/* 5, 2nd */							case 0x51:								utf |= ((this_char & 0x3f) << 18);								stat++;								break;							case 0x50:	/* 6, 2nd */								utf |= ((this_char & 0x3f) << 24);								stat++;								break;							default:								/* invalid */								more = 0;						}					}					/* lead byte */					else if (this_char < 0xe0) {						stat = 0x10;	/* 2 byte */						utf = (this_char & 0x1f) << 6;					} else if (this_char < 0xf0) {						stat = 0x20;	/* 3 byte */						utf = (this_char & 0xf) << 12;					} else if (this_char < 0xf8) {						stat = 0x30;	/* 4 byte */						utf = (this_char & 0x7) << 18;					} else if (this_char < 0xfc) {						stat = 0x40;	/* 5 byte */						utf = (this_char & 0x3) << 24;					} else if (this_char < 0xfe) {						stat = 0x50;	/* 6 byte */						utf = (this_char & 0x1) << 30;					} else {						/* invalid; bail */						more = 0;						break;					}					if (more) {						this_char = str[pos++];						MB_WRITE((unsigned char)this_char);					}				} while (more);			}			break;		case cs_big5:		case cs_gb2312:		case cs_big5hkscs:			{				/* check if this is the first of a 2-byte sequence */				if (this_char >= 0xa1 && this_char <= 0xfe) {					/* peek at the next char */					unsigned char next_char = str[pos];					if ((next_char >= 0x40 && next_char <= 0x7e) ||							(next_char >= 0xa1 && next_char <= 0xfe)) {						/* yes, this a wide char */						this_char <<= 8;						MB_WRITE(next_char);						this_char |= next_char;						pos++;					}									}				break;			}		case cs_sjis:			{				/* check if this is the first of a 2-byte sequence */				if ( (this_char >= 0x81 && this_char <= 0x9f) ||					 (this_char >= 0xe0 && this_char <= 0xef)					) {					/* peek at the next char */					unsigned char next_char = str[pos];					if ((next_char >= 0x40 && next_char <= 0x7e) ||						(next_char >= 0x80 && next_char <= 0xfc))					{						/* yes, this a wide char */						this_char <<= 8;						MB_WRITE(next_char);						this_char |= next_char;						pos++;					}									}				break;			}		case cs_eucjp:			{				/* check if this is the first of a multi-byte sequence */				if (this_char >= 0xa1 && this_char <= 0xfe) {					/* peek at the next char */					unsigned char next_char = str[pos];					if (next_char >= 0xa1 && next_char <= 0xfe) {						/* yes, this a jis kanji char */						this_char <<= 8;						MB_WRITE(next_char);						this_char |= next_char;						pos++;					}									} else if (this_char == 0x8e) {					/* peek at the next char */					unsigned char next_char = str[pos];					if (next_char >= 0xa1 && next_char <= 0xdf) {						/* JIS X 0201 kana */						this_char <<= 8;						MB_WRITE(next_char);						this_char |= next_char;						pos++;					}									} else if (this_char == 0x8f) {					/* peek at the next two char */					unsigned char next_char = str[pos];					unsigned char next2_char = str[pos+1];					if ((next_char >= 0xa1 && next_char <= 0xfe) &&						(next2_char >= 0xa1 && next2_char <= 0xfe)) {						/* JIS X 0212 hojo-kanji */						this_char <<= 8;						MB_WRITE(next_char);						this_char |= next_char;						pos++;						this_char <<= 8;						MB_WRITE(next2_char);						this_char |= next2_char;						pos++;					}									}				break;			}		default:			break;	}	MB_RETURN;}/* }}} *//* {{{ entity_charset determine_charset * returns the charset identifier based on current locale or a hint. * defaults to iso-8859-1 */static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC){	int i;	enum entity_charset charset = cs_8859_1;	int len = 0;	zval *uf_result = NULL;	/* Guarantee default behaviour for backwards compatibility */	if (charset_hint == NULL)		return cs_8859_1;	if ((len = strlen(charset_hint)) != 0) {		goto det_charset;	}#if HAVE_MBSTRING#if !defined(COMPILE_DL_MBSTRING)	/* XXX: Ugly things. Why don't we look for a more sophisticated way? */	switch (MBSTRG(current_internal_encoding)) {		case mbfl_no_encoding_8859_1:			return cs_8859_1;		case mbfl_no_encoding_utf8:			return cs_utf_8;		case mbfl_no_encoding_euc_jp:		case mbfl_no_encoding_eucjp_win:			return cs_eucjp;		case mbfl_no_encoding_sjis:		case mbfl_no_encoding_sjis_win:		case mbfl_no_encoding_sjis_mac:			return cs_sjis;		case mbfl_no_encoding_cp1252:			return cs_cp1252;		case mbfl_no_encoding_8859_15:			return cs_8859_15;		case mbfl_no_encoding_big5:			return cs_big5;		case mbfl_no_encoding_euc_cn:		case mbfl_no_encoding_hz:		case mbfl_no_encoding_cp936:			return cs_gb2312;		case mbfl_no_encoding_koi8r:			return cs_koi8r;		case mbfl_no_encoding_cp866:			return cs_cp866;		case mbfl_no_encoding_cp1251:			return cs_cp1251;		case mbfl_no_encoding_8859_5:			return cs_8859_5;		default:				;	}#else	{		zval nm_mb_internal_encoding;		ZVAL_STRING(&nm_mb_internal_encoding, "mb_internal_encoding", 0);		if (call_user_function_ex(CG(function_table), NULL, &nm_mb_internal_encoding, &uf_result, 0, NULL, 1, NULL TSRMLS_CC) != FAILURE) {			charset_hint = Z_STRVAL_P(uf_result);			len = Z_STRLEN_P(uf_result);						goto det_charset;		}	}#endif#endif	charset_hint = SG(default_charset);	if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {		goto det_charset;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -