📄 parse.c

📁 一个很有名的浏览器
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
 * current key and data pointers and increment * internal pointer. * It returns NULL when key is NULL. */static struct fastfind_key_value *tags_list_next(void){	static struct fastfind_key_value kv;	if (!internal_pointer->name) return NULL;	kv.key = internal_pointer->name;	kv.data = internal_pointer;	internal_pointer++;	return &kv;}static struct fastfind_index ff_tags_index	= INIT_FASTFIND_INDEX("tags_lookup", tags_list_reset, tags_list_next);#endif /* USE_FASTFIND */voidinit_tags_lookup(void){#ifdef USE_FASTFIND	fastfind_index(&ff_tags_index, FF_COMPRESS);#endif}voidfree_tags_lookup(void){#ifdef USE_FASTFIND	fastfind_done(&ff_tags_index);#endif}static unsigned char *process_element(unsigned char *name, int namelen, int endingtag,                unsigned char *html, unsigned char *prev_html,                unsigned char *eof, unsigned char *attr, struct part *part);voidparse_html(unsigned char *html, unsigned char *eof,	   struct part *part, unsigned char *head){	unsigned char *base_pos = html;	int noupdate = 0;	html_context.putsp = -1;	html_context.line_breax = html_context.table_level ? 2 : 1;	html_context.position = 0;	html_context.was_br = 0;	html_context.was_li = 0;	html_context.part = part;	html_context.eoff = eof;	if (head) process_head(head);main_loop:	while (html < eof) {		unsigned char *name, *attr, *end, *prev_html;		int namelen, endingtag;		int dotcounter = 0;		if (!noupdate) {			html_context.part = part;			html_context.eoff = eof;			base_pos = html;		} else {			noupdate = 0;		}		if (isspace(*html) && !html_is_preformatted()) {			unsigned char *h = html;			while (h < eof && isspace(*h))				h++;			if (h + 1 < eof && h[0] == '<' && h[1] == '/') {				if (!parse_element(h, eof, &name, &namelen, &attr, &end)) {					put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part);					base_pos = html = h;					html_context.putsp = 1;					goto element;				}			}			html++;			if (!(html_context.position + (html - base_pos - 1)))				goto skip_w; /* ??? */			if (*(html - 1) == ' ') {	/* Do not replace with isspace() ! --Zas */				/* BIG performance win; not sure if it doesn't cause any bug */				if (html < eof && !isspace(*html)) {					noupdate = 1;					continue;				}				put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part);			} else {				put_chrs(base_pos, html - base_pos - 1, html_context.put_chars_f, part);				put_chrs(" ", 1, html_context.put_chars_f, part);			}skip_w:			while (html < eof && isspace(*html))				html++;			continue;		}		if (html_is_preformatted()) {			html_context.putsp = 0;			if (*html == ASCII_TAB) {				put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part);				put_chrs("        ", 8 - (html_context.position % 8),					 html_context.put_chars_f, part);				html++;				continue;			} else if (*html == ASCII_CR || *html == ASCII_LF) {				put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part);				if (html - base_pos == 0 && html_context.line_breax > 0)					html_context.line_breax--;next_break:				if (*html == ASCII_CR && html < eof - 1				    && html[1] == ASCII_LF)					html++;				ln_break(1, html_context.line_break_f, part);				html++;				if (*html == ASCII_CR || *html == ASCII_LF) {					html_context.line_breax = 0;					goto next_break;				}				continue;			} else if (html + 5 < eof && *html == '&') {				/* Really nasty hack to make &#13; handling in				 * <pre>-tags lynx-compatible. It works around				 * the entity handling done in the renderer,				 * since checking #13 value there would require				 * something along the lines of NBSP_CHAR or				 * checking for '\n's in AT_PREFORMATTED text. */				/* See bug 52 and 387 for more info. */				int length = html - base_pos;				int newlines = 0;				while (html + 5 < eof && !memcmp(html, "&#13;", 5)) {					newlines++;					html += 5;				}				if (newlines) {					put_chrs(base_pos, length, html_context.put_chars_f, part);					ln_break(newlines, html_context.line_break_f, part);					continue;				}			}		}		while (*html < ' ') {			if (html - base_pos)				put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part);			dotcounter++;			base_pos = ++html;			if (*html >= ' ' || isspace(*html) || html >= eof) {				unsigned char *dots = fmem_alloc(dotcounter);				if (dots) {					memset(dots, '.', dotcounter);					put_chrs(dots, dotcounter, html_context.put_chars_f, part);					fmem_free(dots);				}				goto main_loop;			}		}		if (html + 2 <= eof && html[0] == '<' && (html[1] == '!' || html[1] == '?')		    && !html_context.was_xmp) {			put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part);			html = skip_comment(html, eof);			continue;		}		if (*html != '<' || parse_element(html, eof, &name, &namelen, &attr, &end)) {			html++;			noupdate = 1;			continue;		}element:		endingtag = *name == '/'; name += endingtag; namelen -= endingtag;		if (!endingtag && html_context.putsp == 1 && !html_top.invisible)			put_chrs(" ", 1, html_context.put_chars_f, part);		put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part);		if (!html_is_preformatted() && !endingtag && !html_context.putsp) {			unsigned char *ee = end;			unsigned char *nm;			while (!parse_element(ee, eof, &nm, NULL, NULL, &ee))				if (*nm == '/')					goto ng;			if (ee < eof && isspace(*ee)) {				put_chrs(" ", 1, html_context.put_chars_f, part);			}ng:;		}		prev_html = html;		html = process_element(name, namelen, endingtag, end, prev_html, eof, attr, part);	}	if (noupdate) put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part);	ln_break(1, html_context.line_break_f, part);	/* Restore the part in case the html_context was trashed in the last	 * iteration so that when destroying the stack in the caller we still	 * get the right part pointer. */	html_context.part = part;	html_context.putsp = -1;	html_context.position = 0;	html_context.was_br = 0;}static unsigned char *start_element(struct element_info *ei,              unsigned char *name, int namelen, int endingtag,              unsigned char *html, unsigned char *prev_html,              unsigned char *eof, unsigned char *attr, struct part *part){	unsigned char *a;	struct par_attrib old_format;	int restore_format;#ifdef CONFIG_CSS	struct css_selector *selector = NULL;#endif	if (html_context.was_xmp) {		put_chrs("<", 1, html_context.put_chars_f, part);		html = prev_html + 1;		return html;	}	ln_break(ei->linebreak, html_context.line_break_f, part);	a = get_attr_val(attr, "id");	if (a) {		html_context.special_f(part, SP_TAG, a);		mem_free(a);	}	if (html_top.type == ELEMENT_WEAK) {		kill_html_stack_item(&html_top);	}	/* We try to process nested <script> if we didn't process the parent	 * one. */	if (html_top.invisible	    && (ei->func != html_script || html_top.invisible < 2)) {		return html;	}	restore_format = html_is_preformatted();	old_format = par_format;	if (ei->func == html_table && global_doc_opts->tables	    && html_context.table_level < HTML_MAX_TABLE_LEVEL) {		format_table(attr, html, eof, &html, part);		ln_break(2, html_context.line_break_f, part);		return html;	}	if (ei->func == html_select) {		if (!do_html_select(attr, html, eof, &html, part))			return html;	}	if (ei->func == html_textarea) {		do_html_textarea(attr, html, eof, &html, part);		return html;	}#ifdef CONFIG_CSS	if (ei->func == html_style && global_doc_opts->css_enable) {		css_parse_stylesheet(&html_context.css_styles,				     html_context.base_href, html, eof);	}#endif	if (ei->nopair == 2 || ei->nopair == 3) {		struct html_element *e;		if (ei->nopair == 2) {			foreach (e, html_context.stack) {				if (e->type < ELEMENT_KILLABLE) break;				if (e->linebreak || !ei->linebreak) break;			}		} else foreach (e, html_context.stack) {			if (e->linebreak && !ei->linebreak) break;			if (e->type < ELEMENT_KILLABLE) break;			if (!strlcasecmp(e->name, e->namelen, name, namelen)) break;		}		if (!strlcasecmp(e->name, e->namelen, name, namelen)) {			while (e->prev != (void *) &html_context.stack)				kill_html_stack_item(e->prev);			if (e->type > ELEMENT_IMMORTAL)				kill_html_stack_item(e);		}	}	if (ei->nopair != 1) {		html_stack_dup(ELEMENT_KILLABLE);		html_top.name = name;		html_top.namelen = namelen;		html_top.options = attr;		html_top.linebreak = ei->linebreak;#ifdef CONFIG_ECMASCRIPT		if (has_attr(attr, "onClick")) {			/* XXX: Put something better to format.link. --pasky */			mem_free_set(&format.link, stracpy("javascript:void(0);"));			mem_free_set(&format.target, stracpy(html_context.base_target));			format.style.fg = format.clink;			html_top.pseudo_class = ELEMENT_LINK;			mem_free_set(&format.title, stracpy("onClick placeholder"));			/* Er. I know. Well, double html_focusable()s shouldn't			 * really hurt. */			html_focusable(attr);		}#endif	}#ifdef CONFIG_ECMASCRIPT	/* We need to have own element in the stack, that's why we waited for	 * so long. */	if (ei->func == html_script) {		if (!do_html_script(attr, html, eof, &html, part))			return html;	}#endif#ifdef CONFIG_CSS	if (html_top.options && global_doc_opts->css_enable) {		/* XXX: We should apply CSS otherwise as well, but that'll need		 * some deeper changes in order to have options filled etc.		 * Probably just applying CSS from more places, since we		 * usually have nopair set when we either (1) rescan on your		 * own from somewhere else (2) html_stack_dup() in our own way.		 * --pasky */		/* Call it now to gain some of the stuff which might affect		 * formatting of some elements. */		/* FIXME: The caching of the CSS selector is broken, since t can		 * lead to wrong styles being applied to following elements, so		 * disabled for now. */		selector = get_css_selector_for_element(&html_top,							&html_context.css_styles,							&html_context.stack);		if (selector) {			apply_css_selector_style(&html_top, selector);			done_css_selector(selector);		}	}#endif	if (ei->func) ei->func(attr);#ifdef CONFIG_CSS	if (selector && html_top.options) {		/* Call it now to override default colors of the elements. */		selector = get_css_selector_for_element(&html_top,							&html_context.css_styles,							&html_context.stack);		if (selector) {			apply_css_selector_style(&html_top, selector);			done_css_selector(selector);		}	}#endif	if (ei->func != html_br) html_context.was_br = 0;	if (restore_format) par_format = old_format;	return html;}static unsigned char *end_element(struct element_info *ei,            unsigned char *name, int namelen, int endingtag,            unsigned char *html, unsigned char *prev_html,            unsigned char *eof, unsigned char *attr, struct part *part){	struct html_element *e, *elt;	int lnb = 0;	int kill = 0;	if (html_context.was_xmp) {		if (ei->func != html_xmp)			return html;		html_context.was_xmp = 0;	}	html_context.was_br = 0;	if (ei->nopair == 1 || ei->nopair == 3)		return html;	/* dump_html_stack(); */	foreach (e, html_context.stack) {		if (e->linebreak && !ei->linebreak) kill = 1;		if (strlcasecmp(e->name, e->namelen, name, namelen)) {			if (e->type < ELEMENT_KILLABLE)				break;			else				continue;		}		if (kill) {			kill_html_stack_item(e);			break;		}		for (elt = e;		     elt != (void *) &html_context.stack;		     elt = elt->prev)			if (elt->linebreak > lnb)				lnb = elt->linebreak;		/* This hack forces a line break after a list end. It is needed		 * when ending a list with the last <li> having no text the		 * line_breax is 2 so the ending list's linebreak will be		 * ignored when calling ln_break(). */		if (html_context.was_li)			html_context.line_breax = 0;		ln_break(lnb, html_context.line_break_f, part);		while (e->prev != (void *) &html_context.stack)			kill_html_stack_item(e->prev);		kill_html_stack_item(e);		break;	}	/* dump_html_stack(); */	return html;}static unsigned char *process_element(unsigned char *name, int namelen, int endingtag,                unsigned char *html, unsigned char *prev_html,                unsigned char *eof, unsigned char *attr, struct part *part){	struct element_info *ei;#ifndef USE_FASTFIND	{		struct element_info elem;		unsigned char tmp;		tmp = name[namelen];		name[namelen] = '\0';		elem.name = name;		ei = bsearch(&elem, elements, NUMBER_OF_TAGS, sizeof(elem), compar);		name[namelen] = tmp;	}#else	ei = (struct element_info *) fastfind_search(&ff_tags_index, name, namelen);#endif	if (!ei) return html;	if (!endingtag) {		return start_element(ei, name, namelen, endingtag, html, prev_html, eof, attr, part);	} else {		return end_element(ei, name, namelen, endingtag, html, prev_html, eof, attr, part);	}}voidscan_http_equiv(unsigned char *s, unsigned char *eof, struct string *head,		struct string *title){	unsigned char *name, *attr, *he, *c;	int namelen;	if (title && !init_string(title)) return;	add_char_to_string(head, '\n');se:	while (s < eof && *s != '<') {sp:		s++;	}	if (s >= eof) return;	if (s + 2 <= eof && (s[1] == '!' || s[1] == '?')) {		s = skip_comment(s, eof);		goto se;	}	if (parse_element(s, eof, &name, &namelen, &attr, &s)) goto sp;ps:	if (!strlcasecmp(name, namelen, "HEAD", 4)) goto se;	if (!strlcasecmp(name, namelen, "/HEAD", 5)) return;	if (!strlcasecmp(name, namelen, "BODY", 4)) return;	if (title && !title->length && !strlcasecmp(name, namelen, "TITLE", 5)) {		unsigned char *s1;xse:		s1 = s;		while (s < eof && *s != '<') {xsp:			s++;		}		if (s - s1)			add_bytes_to_string(title, s1, s - s1);		if (s >= eof) goto se;		if (s + 2 <= eof && (s[1] == '!' || s[1] == '?')) {			s = skip_comment(s, eof);			goto xse;		}		if (parse_element(s, eof, &name, &namelen, &attr, &s)) {			s1 = s;			goto xsp;		}		clr_spaces(title->source);		goto ps;	}	if (strlcasecmp(name, namelen, "META", 4)) goto se;	he = get_attr_val(attr, "charset");	if (he) {		add_to_string(head, "Charset: ");		add_to_string(head, he);		mem_free(he);	}	he = get_attr_val(attr, "http-equiv");	if (!he) goto se;	add_to_string(head, he);	mem_free(he);	c = get_attr_val(attr, "content");	if (c) {		add_to_string(head, ": ");		add_to_string(head, c);	        mem_free(c);	}	add_crlf_to_string(head);	goto se;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -