📄 charsets.c

📁 一个很有名的浏览器
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
	if (first_time) {		memset(&nb_entity_cache, 0, ENTITY_CACHE_MAXLEN * sizeof(unsigned int));		first_time = 0;	}	/* Check if cached. A test on many websites (freshmeat.net + whole ELinks website	 * + google + slashdot + websites that result from a search for test on google,	 * + various ones) show a quite impressive improvment:	 * Top ten is:	 * 0: hits=2459 l=4 st='nbsp'	 * 1: hits=2152 l=6 st='eacute'	 * 2: hits=235 l=6 st='egrave'	 * 3: hits=136 l=6 st='agrave'	 * 4: hits=100 l=3 st='amp'	 * 5: hits=40 l=5 st='laquo'	 * 6: hits=8 l=4 st='copy'	 * 7: hits=5 l=2 st='gt'	 * 8: hits=2 l=2 st='lt'	 * 9: hits=1 l=6 st='middot'	 *	 * Most of the time cache hit ratio is near 95%.	 *	 * A long test shows: 15186 hits vs. 24 misses and mean iteration	 * count is kept < 2 (worst case 1.58). Not so bad ;)	 *	 * --Zas */	/* entities with length >= ENTITY_CACHE_MAXLEN or == 1 will go in [0] table */	slen = (strlen > 1 && strlen < ENTITY_CACHE_MAXLEN) ? strlen : 0;	if (strlen < ENTITY_CACHE_MAXLEN && nb_entity_cache[slen] > 0) {		int i;		for (i = 0; i < nb_entity_cache[slen]; i++) {			if (entity_cache[slen][i].encoding == encoding			    && !memcmp(str, entity_cache[slen][i].str, strlen)) {#ifdef DEBUG_ENTITY_CACHE				static double total_iter = 0;				static unsigned long hit_count = 0;				total_iter += i + 1;				hit_count++;				fprintf(stderr, "hit after %d iter. (mean = %0.2f)\n", i + 1, total_iter / (double) hit_count);#endif				if (entity_cache[slen][i].hits < (unsigned int) ~0)					entity_cache[slen][i].hits++;				return entity_cache[slen][i].result;			}		}#ifdef DEBUG_ENTITY_CACHE		fprintf(stderr, "miss\n");#endif	}	if (*str == '#') { /* Numeric entity. */		int l = (int) strlen;		unsigned char *st = (unsigned char *) str;		unicode_val n = 0;		if (l == 1) goto end; /* &#; ? */		st++, l--;		if ((*st | 32) == 'x') { /* Hexadecimal */			if (l == 1 || l > 9) goto end; /* xFFFFFFFF max. */			st++, l--;			do {				unsigned char c = (*(st++) | 32);				if (isdigit(c))					n = (n << 4) | (c - '0');				else if (isxdigit(c))					n = (n << 4) | (c - 'a' + 10);				else					goto end; /* Bad char. */			} while (--l);		} else { /* Decimal */			if (l > 10) goto end; /* 4294967295 max. */			do {				unsigned char c = *(st++);				if (isdigit(c))					n = n * 10 + c - '0';				else					goto end; /* Bad char. */				/* Limit to 0xFFFFFFFF. */				if (n == (unicode_val) 0xFFFFFFFF)					goto end;			} while (--l);		}		result = u2cp(n, encoding);#ifdef DEBUG_ENTITY_CACHE		fprintf(stderr, "%lu %016x %s\n", (unsigned long) n , n, result);#endif	} else { /* Text entity. */		struct string key = INIT_STRING((unsigned char *) str, strlen);		struct entity *element = bsearch((void *) &key, entities,						 N_ENTITIES,						 sizeof(*element),						 compare_entities);		if (element) result = u2cp(element->c, encoding);	}end:	/* Take care of potential buffer overflow. */	if (strlen < sizeof(entity_cache[slen][0].str)) {		struct entity_cache *ece = &entity_cache[slen][nb_entity_cache[slen]];		/* Copy new entry to cache. */		ece->hits = 1;		ece->strlen = strlen;		ece->encoding = encoding;		ece->result = result;		memcpy(ece->str, str, strlen);		ece->str[strlen] = '\0';		/* Increment number of cache entries if possible. */		if (nb_entity_cache[slen] < ENTITY_CACHE_SIZE) nb_entity_cache[slen]++;#ifdef DEBUG_ENTITY_CACHE		fprintf(stderr, "Added in [%d]: l=%d st='%s'\n", slen,				entity_cache[slen][0].strlen, entity_cache[slen][0].str);#endif		/* Sort entries by hit order. */		if (nb_entity_cache[slen] > 1)			qsort(&entity_cache[slen][0], nb_entity_cache[slen],			      sizeof(entity_cache[slen][0]), (void *) hits_cmp);#ifdef DEBUG_ENTITY_CACHE	{		unsigned int i;		fprintf(stderr, "- Cache entries [%d] -\n", slen);		for (i = 0; i < nb_entity_cache[slen] ; i++)			fprintf(stderr, "%d: hits=%d l=%d st='%s'\n", i,				entity_cache[slen][i].hits, entity_cache[slen][i].strlen,				entity_cache[slen][i].str);		fprintf(stderr, "-----------------\n");	}#endif	}	return result;}unsigned char *convert_string(struct conv_table *convert_table,	       unsigned char *chars, int charslen,	       enum convert_string_mode mode, int *length,	       void (*callback)(void *data, unsigned char *buf, int buflen),	       void *callback_data){	unsigned char *buffer;	int bufferpos = 0;	int charspos = 0;	if (!convert_table && !memchr(chars, '&', charslen)) {		if (callback) {			if (charslen) callback(callback_data, chars, charslen);			return NULL;		} else {			return memacpy(chars, charslen);		}	}	/* Buffer allocation */	buffer = mem_alloc(ALLOC_GR + 1 /* trailing \0 */);	if (!buffer) return NULL;	/* Iterate ;-) */	while (charspos < charslen) {		unsigned char *translit;#define PUTC do { \		buffer[bufferpos++] = chars[charspos++]; \		translit = ""; \		goto flush; \	} while (0)		if (chars[charspos] != '&') {			struct conv_table *t;			int i;			if (chars[charspos] < 128 || !convert_table) PUTC;			t = convert_table;			i = charspos;			while (t[chars[i]].t) {				t = t[chars[i++]].u.tbl;				if (i >= charslen) PUTC;			}			translit = t[chars[i]].u.str;			charspos = i + 1;		} else if (mode == CSM_FORM || mode == CSM_NONE) {			PUTC;		} else {			int start = charspos + 1;			int i = start;			while (i < charslen			       && (isasciialpha(chars[i])				   || isdigit(chars[i])				   || (chars[i] == '#')))				i++;			/* This prevents bug 213: we were expanding "entities"			 * in URL query strings. */			/* XXX: But this disables &nbsp&nbsp usage, which			 * appears to be relatively common! --pasky */			if ((mode == CSM_DEFAULT || (chars[i] != '&' && chars[i] != '='))			    && i > start			    && !isasciialpha(chars[i]) && !isdigit(chars[i])) {				translit = get_entity_string(&chars[start], i - start,						      global_doc_opts->cp);				if (chars[i] != ';') {					/* Eat &nbsp &nbsp<foo> happily, but					 * pull back from the character after					 * entity string if it is not the valid					 * terminator. */					i--;				}				if (!translit) PUTC;				charspos = i + (i < charslen);			} else PUTC;		}		if (!translit[0]) continue;		if (!translit[1]) {			buffer[bufferpos++] = translit[0];			translit = "";			goto flush;		}		while (*translit) {			unsigned char *new;			buffer[bufferpos++] = *(translit++);flush:			if (bufferpos & (ALLOC_GR - 1)) continue;			if (callback) {				buffer[bufferpos] = 0;				callback(callback_data, buffer, bufferpos);				bufferpos = 0;			} else {				new = mem_realloc(buffer, bufferpos + ALLOC_GR);				if (!new) {					mem_free(buffer);					return NULL;				}				buffer = new;			}		}#undef PUTC	}	/* Say bye */	buffer[bufferpos] = 0;	if (length) *length = bufferpos;	if (callback) {		if (bufferpos) callback(callback_data, buffer, bufferpos);		mem_free(buffer);		return NULL;	} else {		return buffer;	}}#ifndef USE_FASTFINDintget_cp_index(unsigned char *name){	int i, a;	int syscp = 0;	if (!strcasecmp(name, "System")) {#if HAVE_LANGINFO_CODESET		name = nl_langinfo(CODESET);		syscp = SYSTEM_CHARSET_FLAG;#else		name = "us-ascii";#endif	}	for (i = 0; codepages[i].name; i++) {		for (a = 0; codepages[i].aliases[a]; a++) {			/* In the past, we looked for the longest substring			 * in all the names; it is way too expensive, though:			 *			 *   %   cumulative   self              self     total			 *  time   seconds   seconds    calls  us/call  us/call  name			 *  3.00      0.66     0.03     1325    22.64    22.64  get_cp_index			 *			 * Anything called from redraw_screen() is in fact			 * relatively expensive, even if it's called just			 * once. So we will do a simple strcasecmp() here.			 */			if (!strcasecmp(name, codepages[i].aliases[a]))				return i | syscp;		}	}	if (syscp) {		return get_cp_index("us-ascii") | syscp;	} else {		return -1;	}}#elsestatic unsigned int i_name = 0;static unsigned int i_alias = 0;/* Reset internal list pointer */voidcharsets_list_reset(void){	i_name = 0;	i_alias = 0;}/* Returns a pointer to a struct that contains current key and data pointers * and increment internal pointer.  It returns NULL when key is NULL. */struct fastfind_key_value *charsets_list_next(void){	static struct fastfind_key_value kv;	if (!codepages[i_name].name) return NULL;	kv.key = codepages[i_name].aliases[i_alias];	kv.data = &codepages[i_name];	if (codepages[i_name].aliases[i_alias + 1])		i_alias++;	else {		i_name++;		i_alias = 0;	}	return &kv;}static struct fastfind_index ff_charsets_index	= INIT_FASTFIND_INDEX("charsets_lookup", charsets_list_reset, charsets_list_next);/* It searchs for a charset named @name or one of its aliases and * returns index for it or -1 if not found. */intget_cp_index(unsigned char *name){	struct codepage_desc *codepage;	int syscp = 0;	if (!strcasecmp(name, "System")) {#if HAVE_LANGINFO_CODESET		name = nl_langinfo(CODESET);		syscp = SYSTEM_CHARSET_FLAG;#else		name = "us-ascii";#endif	}	codepage = fastfind_search(&ff_charsets_index, name, strlen(name));	if (codepage) {		assert(codepages <= codepage && codepage < codepages + N_CODEPAGES);		return (codepage - codepages) | syscp;	} else if (syscp) {		return get_cp_index("us-ascii") | syscp;	} else {		return -1;	}}#endif /* USE_FASTFIND */voidinit_charsets_lookup(void){#ifdef USE_FASTFIND	fastfind_index(&ff_charsets_index, FF_COMPRESS);#endif}voidfree_charsets_lookup(void){#ifdef USE_FASTFIND	fastfind_done(&ff_charsets_index);#endif}unsigned char *get_cp_name(int cp_index){	if (cp_index < 0) return "none";	if (cp_index & SYSTEM_CHARSET_FLAG) return "System";	return codepages[cp_index].name;}unsigned char *get_cp_mime_name(int cp_index){	if (cp_index < 0) return "none";	if (cp_index & SYSTEM_CHARSET_FLAG) return "System";	if (!codepages[cp_index].aliases) return NULL;	return codepages[cp_index].aliases[0];}intis_cp_special(int cp_index){	cp_index &= ~SYSTEM_CHARSET_FLAG;	return codepages[cp_index].table == table_utf_8;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -