⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 charsets.c

📁 ELinks is an advanced and well-established feature-rich text mode web (HTTP/FTP/..) browser. ELinks
💻 C
📖 第 1 页 / 共 2 页
字号:
/* charsets.c * (c) 2002 Mikulas Patocka, Karel 'Clock' Kulhavy * This file is a part of the Links program, released under GPL. */#include "links.h"int utf8_table;struct table_entry {	unsigned char c;	int u;};struct codepage_desc {	unsigned char *name;	unsigned char **aliases;	struct table_entry *table;};#include "codepage.inc"#include "uni_7b.inc"#include "entity.inc"#include "upcase.inc"char strings[256][2] = {	"\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",	"\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017",	"\020", "\021", "\022", "\023", "\024", "\025", "\026", "\033",	"\030", "\031", "\032", "\033", "\034", "\035", "\036", "\033",	"\040", "\041", "\042", "\043", "\044", "\045", "\046", "\047",	"\050", "\051", "\052", "\053", "\054", "\055", "\056", "\057",	"\060", "\061", "\062", "\063", "\064", "\065", "\066", "\067",	"\070", "\071", "\072", "\073", "\074", "\075", "\076", "\077",	"\100", "\101", "\102", "\103", "\104", "\105", "\106", "\107",	"\110", "\111", "\112", "\113", "\114", "\115", "\116", "\117",	"\120", "\121", "\122", "\123", "\124", "\125", "\126", "\127",	"\130", "\131", "\132", "\133", "\134", "\135", "\136", "\137",	"\140", "\141", "\142", "\143", "\144", "\145", "\146", "\147",	"\150", "\151", "\152", "\153", "\154", "\155", "\156", "\157",	"\160", "\161", "\162", "\163", "\164", "\165", "\166", "\167",	"\170", "\171", "\172", "\173", "\174", "\175", "\176", "\177",	"\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207",	"\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217",	"\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227",	"\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237",	"\240", "\241", "\242", "\243", "\244", "\245", "\246", "\247",	"\250", "\251", "\252", "\253", "\254", "\255", "\256", "\257",	"\260", "\261", "\262", "\263", "\264", "\265", "\266", "\267",	"\270", "\271", "\272", "\273", "\274", "\275", "\276", "\277",	"\300", "\301", "\302", "\303", "\304", "\305", "\306", "\307",	"\310", "\311", "\312", "\313", "\314", "\315", "\316", "\317",	"\320", "\321", "\322", "\323", "\324", "\325", "\326", "\327",	"\330", "\331", "\332", "\333", "\334", "\335", "\336", "\337",	"\340", "\341", "\342", "\343", "\344", "\345", "\346", "\347",	"\350", "\351", "\352", "\353", "\354", "\355", "\356", "\357",	"\360", "\361", "\362", "\363", "\364", "\365", "\366", "\367",	"\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377",};/* prototypes */void free_translation_table(struct conv_table *);void new_translation_table(struct conv_table *);void add_utf_8(struct conv_table *, int, unsigned char *);void free_utf_table(void);struct conv_table *get_translation_table_to_utf_8(int);void free_translation_table(struct conv_table *p){	int i;	for (i = 0; i < 256; i++) if (p[i].t) free_translation_table(p[i].u.tbl);	mem_free(p);}unsigned char *no_str = "*";void new_translation_table(struct conv_table *p){	int i;	for (i = 0; i < 256; i++) if (p[i].t) free_translation_table(p[i].u.tbl);	for (i = 0; i < 128; i++) p[i].t = 0, p[i].u.str = strings[i];	for (; i < 256; i++) p[i].t = 0, p[i].u.str = no_str;}int strange_chars[32] = {	0x20ac, 0x0000, 0x002a, 0x0000, 0x201e, 0x2026, 0x2020, 0x2021,	0x005e, 0x2030, 0x0160, 0x003c, 0x0152, 0x0000, 0x0000, 0x0000,	0x0000, 0x0060, 0x0027, 0x0022, 0x0022, 0x002a, 0x2013, 0x2014,	0x007e, 0x2122, 0x0161, 0x003e, 0x0153, 0x0000, 0x0000, 0x0000,};#define U_EQUAL(a, b) unicode_7b[a].x == (b)#define U_ABOVE(a, b) unicode_7b[a].x > (b)static inline unsigned char *u2cp(int u, int to, int fallback){	int j, s;	again:	if (u < 128) return strings[u];	if (u == 0xa0) return "\001";	if (u == 0xad) return "";	if (codepages[to].table == table_utf_8) return encode_utf_8(u);	if (u < 0xa0) {		u = strange_chars[u - 0x80];		if (!u) return NULL;		goto again;	}	for (j = 0; codepages[to].table[j].c; j++)		if (codepages[to].table[j].u == u)			return strings[codepages[to].table[j].c];	if (!fallback) return NULL;	BIN_SEARCH(N_UNICODE_7B, U_EQUAL, U_ABOVE, u, s);	if (s != -1) return unicode_7b[s].s;	return NULL;}int cp2u(unsigned char ch, int from){	struct table_entry *e;	if (from < 0 || ch < 0x80) return ch;	for (e = codepages[from].table; e->c; e++) if (e->c == ch) return e->u;	return -1;}unsigned char utf_buffer[7];unsigned char *encode_utf_8(int u){	memset(utf_buffer, 0, 7);	if (u < 0x80) utf_buffer[0] = u;	else if (u < 0x800)		utf_buffer[0] = 0xc0 | ((u >> 6) & 0x1f),		utf_buffer[1] = 0x80 | (u & 0x3f);	else if (u < 0x10000)		utf_buffer[0] = 0xe0 | ((u >> 12) & 0x0f),		utf_buffer[1] = 0x80 | ((u >> 6) & 0x3f),		utf_buffer[2] = 0x80 | (u & 0x3f);	else if (u < 0x200000)		utf_buffer[0] = 0xf0 | ((u >> 18) & 0x0f),		utf_buffer[1] = 0x80 | ((u >> 12) & 0x3f),		utf_buffer[2] = 0x80 | ((u >> 6) & 0x3f),		utf_buffer[3] = 0x80 | (u & 0x3f);	else if (u < 0x4000000)		utf_buffer[0] = 0xf8 | ((u >> 24) & 0x0f),		utf_buffer[1] = 0x80 | ((u >> 18) & 0x3f),		utf_buffer[2] = 0x80 | ((u >> 12) & 0x3f),		utf_buffer[3] = 0x80 | ((u >> 6) & 0x3f),		utf_buffer[4] = 0x80 | (u & 0x3f);	else	utf_buffer[0] = 0xfc | ((u >> 30) & 0x01),		utf_buffer[1] = 0x80 | ((u >> 24) & 0x3f),		utf_buffer[2] = 0x80 | ((u >> 18) & 0x3f),		utf_buffer[3] = 0x80 | ((u >> 12) & 0x3f),		utf_buffer[4] = 0x80 | ((u >> 6) & 0x3f),		utf_buffer[5] = 0x80 | (u & 0x3f);	return utf_buffer;}void add_utf_8(struct conv_table *ct, int u, unsigned char *str){	unsigned char *p = encode_utf_8(u);	while (p[1]) {		if (ct[*p].t) ct = ct[*p].u.tbl;		else {			struct conv_table *nct;			if (ct[*p].u.str != no_str) {				internal("bad utf encoding #1");				return;			}			nct = mem_alloc(sizeof(struct conv_table) * 256);			memset(nct, 0, sizeof(struct conv_table) * 256);			new_translation_table(nct);			ct[*p].t = 1;			ct[*p].u.tbl = nct;			ct = nct;		}		p++;	}	if (ct[*p].t) {		internal("bad utf encoding #2");		return;	}	if (ct[*p].u.str == no_str) ct[*p].u.str = str;}struct conv_table utf_table[256];int utf_table_init = 1;void free_utf_table(void){	int i;	for (i = 128; i < 256; i++) mem_free(utf_table[i].u.str);}struct conv_table *get_translation_table_to_utf_8(int from){	int i;	static int lfr = -1;	if (from == -1) return NULL;	if (from == lfr) return utf_table;	if (utf_table_init) memset(utf_table, 0, sizeof(struct conv_table) * 256), utf_table_init = 0;	else free_utf_table();	for (i = 0; i < 128; i++) utf_table[i].u.str = strings[i];	if (codepages[from].table == table_utf_8) {		for (i = 128; i < 256; i++) utf_table[i].u.str = stracpy(strings[i]);		return utf_table;	}	for (i = 128; i < 256; i++) utf_table[i].u.str = NULL;	for (i = 0; codepages[from].table[i].c; i++) {		int u = codepages[from].table[i].u;		if (!utf_table[codepages[from].table[i].c].u.str)			utf_table[codepages[from].table[i].c].u.str = stracpy(encode_utf_8(u));	}	for (i = 128; i < 256; i++)		if (!utf_table[i].u.str) utf_table[i].u.str = stracpy(no_str);	return utf_table;}unsigned short int utf8_2_uni_table[0x200] = {	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 128,	0, 0, 0, 192,	0,	0, 0, 256,	0, 0, 0, 320,	0, 0, 0, 384,	0, 0, 0, 448,	0,	0, 0, 512,	0, 0, 0, 576,	0, 0, 0, 640,	0, 0, 0, 704,	0,	0, 0, 768,	0, 0, 0, 832,	0, 0, 0, 896,	0, 0, 0, 960,	0,	0, 0, 1024,	0, 0, 0, 1088,	0, 0, 0, 1152,	0, 0, 0, 1216,	0,	0, 0, 1280,	0, 0, 0, 1344,	0, 0, 0, 1408,	0, 0, 0, 1472,	0,	0, 0, 1536,	0, 0, 0, 1600,	0, 0, 0, 1664,	0, 0, 0, 1728,	0,	0, 0, 1792,	0, 0, 0, 1856,	0, 0, 0, 1920,	0, 0, 0, 1984,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,	0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0, 0, 0, 0,	0,};unsigned char utf_8_1[256] = {	0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,	7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,	3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 0, 0,};int get_utf_8(unsigned char **s){	int v, l;	unsigned char *p = *s;	if (!(l = utf_8_1[p[0]])) {		(*s)++;		ret:		return 0;	}	v = p[0] & ((1 << l) - 1);	(*s)++;	while (l++ <= 5) {		int c = **s - 0x80;		if (c < 0 || c >= 0x40) goto ret;		(*s)++;		v = (v << 6) + c;	}	return v;}struct conv_table table[256];int table_init = 1;void free_conv_table(void){	if (!utf_table_init) free_utf_table();	if (!table_init) new_translation_table(table);}struct conv_table *get_translation_table(int from, int to){	int i;	static int lfr = -1;	static int lto = -1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -