📄 html.c
字号:
{ cs_utf_8, 8465, 8501, ent_uni_8465_8501 }, { cs_utf_8, 8592, 9002, ent_uni_8592_9002 }, { cs_utf_8, 9674, 9674, ent_uni_9674 }, { cs_utf_8, 9824, 9830, ent_uni_9824_9830 }, { cs_big5, 0xa0, 0xff, ent_iso_8859_1 }, { cs_gb2312, 0xa0, 0xff, ent_iso_8859_1 }, { cs_big5hkscs, 0xa0, 0xff, ent_iso_8859_1 }, { cs_sjis, 0xa0, 0xff, ent_iso_8859_1 }, { cs_eucjp, 0xa0, 0xff, ent_iso_8859_1 }, { cs_koi8r, 0xa3, 0xff, ent_koi8r }, { cs_cp1251, 0x80, 0xff, ent_cp_1251 }, { cs_8859_5, 0xc0, 0xff, ent_iso_8859_5 }, { cs_cp866, 0xc0, 0xff, ent_cp_866 }, { cs_terminator }};static const struct { const char *codeset; enum entity_charset charset;} charset_map[] = { { "ISO-8859-1", cs_8859_1 }, { "ISO8859-1", cs_8859_1 }, { "ISO-8859-15", cs_8859_15 }, { "ISO8859-15", cs_8859_15 }, { "utf-8", cs_utf_8 }, { "cp1252", cs_cp1252 }, { "Windows-1252", cs_cp1252 }, { "1252", cs_cp1252 }, { "BIG5", cs_big5 }, { "950", cs_big5 }, { "GB2312", cs_gb2312 }, { "936", cs_gb2312 }, { "BIG5-HKSCS", cs_big5hkscs }, { "Shift_JIS", cs_sjis }, { "SJIS", cs_sjis }, { "932", cs_sjis }, { "EUCJP", cs_eucjp }, { "EUC-JP", cs_eucjp }, { "KOI8-R", cs_koi8r }, { "koi8-ru", cs_koi8r }, { "koi8r", cs_koi8r }, { "cp1251", cs_cp1251 }, { "Windows-1251", cs_cp1251 }, { "win-1251", cs_cp1251 }, { "iso8859-5", cs_8859_5 }, { "iso-8859-5", cs_8859_5 }, { "cp866", cs_cp866 }, { "866", cs_cp866 }, { "ibm866", cs_cp866 }, { NULL }};static const struct { unsigned short charcode; char *entity; int entitylen; int flags;} basic_entities[] = { { '"', """, 6, ENT_HTML_QUOTE_DOUBLE }, { '\'', "'", 6, ENT_HTML_QUOTE_SINGLE }, { '\'', "'", 5, ENT_HTML_QUOTE_SINGLE }, { '<', "<", 4, 0 }, { '>', ">", 4, 0 }, { '&', "&", 5, 0 }, /* this should come last */ { 0, NULL, 0, 0 }}; #define MB_RETURN { \ *newpos = pos; \ mbseq[mbpos] = '\0'; \ *mbseqlen = mbpos; \ return this_char; } #define MB_WRITE(mbchar) { \ mbspace--; \ if (mbspace == 0) { \ MB_RETURN; \ } \ mbseq[mbpos++] = (mbchar); }/* {{{ get_next_char */inline static unsigned short get_next_char(enum entity_charset charset, unsigned char * str, int * newpos, unsigned char * mbseq, int * mbseqlen){ int pos = *newpos; int mbpos = 0; int mbspace = *mbseqlen; unsigned short this_char = str[pos++]; if (mbspace <= 0) { *mbseqlen = 0; return this_char; } MB_WRITE((unsigned char)this_char); switch (charset) { case cs_utf_8: { unsigned long utf = 0; int stat = 0; int more = 1; /* unpack utf-8 encoding into a wide char. * Code stolen from the mbstring extension */ do { if (this_char < 0x80) { more = 0; break; } else if (this_char < 0xc0) { switch (stat) { case 0x10: /* 2, 2nd */ case 0x21: /* 3, 3rd */ case 0x32: /* 4, 4th */ case 0x43: /* 5, 5th */ case 0x54: /* 6, 6th */ /* last byte in sequence */ more = 0; utf |= (this_char & 0x3f); this_char = (unsigned short)utf; break; case 0x20: /* 3, 2nd */ case 0x31: /* 4, 3rd */ case 0x42: /* 5, 4th */ case 0x53: /* 6, 5th */ /* penultimate char */ utf |= ((this_char & 0x3f) << 6); stat++; break; case 0x30: /* 4, 2nd */ case 0x41: /* 5, 3rd */ case 0x52: /* 6, 4th */ utf |= ((this_char & 0x3f) << 12); stat++; break; case 0x40: /* 5, 2nd */ case 0x51: utf |= ((this_char & 0x3f) << 18); stat++; break; case 0x50: /* 6, 2nd */ utf |= ((this_char & 0x3f) << 24); stat++; break; default: /* invalid */ more = 0; } } /* lead byte */ else if (this_char < 0xe0) { stat = 0x10; /* 2 byte */ utf = (this_char & 0x1f) << 6; } else if (this_char < 0xf0) { stat = 0x20; /* 3 byte */ utf = (this_char & 0xf) << 12; } else if (this_char < 0xf8) { stat = 0x30; /* 4 byte */ utf = (this_char & 0x7) << 18; } else if (this_char < 0xfc) { stat = 0x40; /* 5 byte */ utf = (this_char & 0x3) << 24; } else if (this_char < 0xfe) { stat = 0x50; /* 6 byte */ utf = (this_char & 0x1) << 30; } else { /* invalid; bail */ more = 0; break; } if (more) { this_char = str[pos++]; MB_WRITE((unsigned char)this_char); } } while (more); } break; case cs_big5: case cs_gb2312: case cs_big5hkscs: { /* check if this is the first of a 2-byte sequence */ if (this_char >= 0xa1 && this_char <= 0xfe) { /* peek at the next char */ unsigned char next_char = str[pos]; if ((next_char >= 0x40 && next_char <= 0x7e) || (next_char >= 0xa1 && next_char <= 0xfe)) { /* yes, this a wide char */ this_char <<= 8; MB_WRITE(next_char); this_char |= next_char; pos++; } } break; } case cs_sjis: { /* check if this is the first of a 2-byte sequence */ if ( (this_char >= 0x81 && this_char <= 0x9f) || (this_char >= 0xe0 && this_char <= 0xef) ) { /* peek at the next char */ unsigned char next_char = str[pos]; if ((next_char >= 0x40 && next_char <= 0x7e) || (next_char >= 0x80 && next_char <= 0xfc)) { /* yes, this a wide char */ this_char <<= 8; MB_WRITE(next_char); this_char |= next_char; pos++; } } break; } case cs_eucjp: { /* check if this is the first of a multi-byte sequence */ if (this_char >= 0xa1 && this_char <= 0xfe) { /* peek at the next char */ unsigned char next_char = str[pos]; if (next_char >= 0xa1 && next_char <= 0xfe) { /* yes, this a jis kanji char */ this_char <<= 8; MB_WRITE(next_char); this_char |= next_char; pos++; } } else if (this_char == 0x8e) { /* peek at the next char */ unsigned char next_char = str[pos]; if (next_char >= 0xa1 && next_char <= 0xdf) { /* JIS X 0201 kana */ this_char <<= 8; MB_WRITE(next_char); this_char |= next_char; pos++; } } else if (this_char == 0x8f) { /* peek at the next two char */ unsigned char next_char = str[pos]; unsigned char next2_char = str[pos+1]; if ((next_char >= 0xa1 && next_char <= 0xfe) && (next2_char >= 0xa1 && next2_char <= 0xfe)) { /* JIS X 0212 hojo-kanji */ this_char <<= 8; MB_WRITE(next_char); this_char |= next_char; pos++; this_char <<= 8; MB_WRITE(next2_char); this_char |= next2_char; pos++; } } break; } default: break; } MB_RETURN;}/* }}} *//* {{{ entity_charset determine_charset * returns the charset identifier based on current locale or a hint. * defaults to iso-8859-1 */static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC){ int i; enum entity_charset charset = cs_8859_1; int len = 0; zval *uf_result = NULL; /* Guarantee default behaviour for backwards compatibility */ if (charset_hint == NULL) return cs_8859_1; if ((len = strlen(charset_hint)) != 0) { goto det_charset; }#if HAVE_MBSTRING#if !defined(COMPILE_DL_MBSTRING) /* XXX: Ugly things. Why don't we look for a more sophisticated way? */ switch (MBSTRG(current_internal_encoding)) { case mbfl_no_encoding_8859_1: return cs_8859_1; case mbfl_no_encoding_utf8: return cs_utf_8; case mbfl_no_encoding_euc_jp: case mbfl_no_encoding_eucjp_win: return cs_eucjp; case mbfl_no_encoding_sjis: case mbfl_no_encoding_sjis_win: case mbfl_no_encoding_sjis_mac: return cs_sjis; case mbfl_no_encoding_cp1252: return cs_cp1252; case mbfl_no_encoding_8859_15: return cs_8859_15; case mbfl_no_encoding_big5: return cs_big5; case mbfl_no_encoding_euc_cn: case mbfl_no_encoding_hz: case mbfl_no_encoding_cp936: return cs_gb2312; case mbfl_no_encoding_koi8r: return cs_koi8r; case mbfl_no_encoding_cp866: return cs_cp866; case mbfl_no_encoding_cp1251: return cs_cp1251; case mbfl_no_encoding_8859_5: return cs_8859_5; default: ; }#else { zval nm_mb_internal_encoding; ZVAL_STRING(&nm_mb_internal_encoding, "mb_internal_encoding", 0); if (call_user_function_ex(CG(function_table), NULL, &nm_mb_internal_encoding, &uf_result, 0, NULL, 1, NULL TSRMLS_CC) != FAILURE) { charset_hint = Z_STRVAL_P(uf_result); len = Z_STRLEN_P(uf_result); goto det_charset; } }#endif#endif charset_hint = SG(default_charset); if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) { goto det_charset;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -