parser.c

来自「elinks下lynx是最重要的二个文本浏览器, 在linux下非常实用, el」· C语言代码 · 共 530 行
530 行
/* CSS main parser */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include <stdlib.h>#include <string.h>#include "elinks.h"#include "document/css/parser.h"#include "document/css/property.h"#include "document/css/scanner.h"#include "document/css/stylesheet.h"#include "document/css/value.h"#include "document/html/parser.h"#include "util/color.h"#include "util/lists.h"#include "util/error.h"#include "util/memory.h"#include "util/string.h"/* #define DEBUG_CSS */voidcss_parse_properties(struct list_head *props, struct scanner *scanner){	assert(props && scanner);	while (scanner_has_tokens(scanner)) {		struct css_property_info *property_info = NULL;		struct css_property *prop;		struct scanner_token *token = get_scanner_token(scanner);		int i;		if (!token || token->type == '}') break;		/* Extract property name. */		if (token->type != CSS_TOKEN_IDENT		    || !check_next_scanner_token(scanner, ':')) {			/* Some use style="{ properties }" so we have to be			 * check what to skip to. */			if (token->type == '{') {				skip_scanner_token(scanner);			} else {				skip_css_tokens(scanner, ';');			}			continue;		}		for (i = 0; css_property_info[i].name; i++) {			struct css_property_info *info = &css_property_info[i];			if (scanner_token_strlcasecmp(token, info->name, -1)) {				property_info = info;				break;			}		}		/* Skip property name and separator and check for expression */		if (!skip_css_tokens(scanner, ':')) {			assert(!scanner_has_tokens(scanner));			break;		}		if (!property_info) { 			/* Unknown property, check the next one. */ 			goto ride_on; 		}		/* We might be on track of something, cook up the struct. */		prop = mem_calloc(1, sizeof(*prop));		if (!prop) {			goto ride_on;		}		prop->type = property_info->type;		prop->value_type = property_info->value_type;		if (!css_parse_value(property_info, &prop->value, scanner)) {			mem_free(prop);			goto ride_on;		}		add_to_list(*props, prop);		/* Maybe we have something else to go yet? */ride_on:		skip_css_tokens(scanner, ';');	}}/* TODO: We should handle support for skipping blocks better like "{ { } }" * will be handled correctly. --jonas */#define skip_css_block(scanner) \	if (skip_css_tokens(scanner, '{')) skip_css_tokens(scanner, '}');/* Atrules grammer: * * media_types: *	  <empty> *	| <ident> *	| media_types ',' <ident> * * atrule: * 	  '@charset' <string> ';' *	| '@import' <string> media_types ';' *	| '@import' <uri> media_types ';' *	| '@media' media_types '{' ruleset* '}' *	| '@page' <ident>? [':' <ident>]? '{' properties '}' *	| '@font-face' '{' properties '}' */static voidcss_parse_atrule(struct css_stylesheet *css, struct scanner *scanner,		 struct uri *base_uri){	struct scanner_token *token = get_scanner_token(scanner);	/* Skip skip skip that code */	switch (token->type) {		case CSS_TOKEN_AT_IMPORT:			token = get_next_scanner_token(scanner);			if (!token) break;			if (token->type == CSS_TOKEN_STRING			    || token->type == CSS_TOKEN_URL) {				assert(css->import);				css->import(css, base_uri, token->string, token->length);			}			skip_css_tokens(scanner, ';');			break;		case CSS_TOKEN_AT_CHARSET:			skip_css_tokens(scanner, ';');			break;		case CSS_TOKEN_AT_FONT_FACE:		case CSS_TOKEN_AT_MEDIA:		case CSS_TOKEN_AT_PAGE:			skip_css_block(scanner);			break;		case CSS_TOKEN_AT_KEYWORD:			/* TODO: Unkown @-rule so either skip til ';' or next block. */			while (scanner_has_tokens(scanner)) {				token = get_next_scanner_token(scanner);				if (!token) break;				if (token->type == ';') {					skip_scanner_token(scanner);					break;				} else if (token->type == '{') {					skip_css_block(scanner);					break;				}			}			break;		default:			INTERNAL("@-rule parser called without atrule.");	}}struct selector_pkg {	LIST_HEAD(struct selector_pkg);	struct css_selector *selector;};struct css_selector *reparent_selector(struct list_head *sels, struct css_selector *selector,                  struct css_selector **watch){	struct css_selector *twin = find_css_selector(sels, selector->type,	                                              selector->relation,	                                              selector->name, -1);	if (twin) {		merge_css_selectors(twin, selector);		/* Reparent leaves. */		while (selector->leaves.next != &selector->leaves) {			struct css_selector *leaf = selector->leaves.next;			reparent_selector(&twin->leaves, leaf, watch);		}		if (*watch == selector)			*watch = twin;		done_css_selector(selector);	} else {		if (selector->next) del_from_list(selector);		add_to_list(*sels, selector);	}	return twin ? twin : selector;}/* Our selector grammar: * * selector: *	  element_name? ('#' id)? ('.' class)? (':' pseudo_class)? \ *		  ((' ' | '>') selector)? * */static voidcss_parse_selector(struct css_stylesheet *css, struct scanner *scanner,		   struct list_head *selectors){	/* Shell for the last selector (the whole selector chain, that is). */	struct selector_pkg *pkg = NULL;	/* In 'p#x.y i.z', it's NULL for 'p', 'p' for '#x', '.y' and 'i', and	 * 'i' for '.z'. */	struct css_selector *prev_element_selector = NULL;	/* In 'p#x.y:q i', it's NULL for 'p' and '#x', '#x' for '.y', and '.y'	 * for ':q', and again NULL for 'i'. */	struct css_selector *prev_specific_selector = NULL;	/* In 'p#x.y div.z:a' it is NULL for 'p#x.y' and 'div', and 'p' for	 * '.z' and ':a'. So the difference from @prev_element_selector is that	 * it is changed after the current selector fragment is finished, not	 * right after the base selector is loaded. So it is set differently	 * for the '#x.y' and '.z:a' parts of selector. */	struct css_selector *last_chained_selector = NULL;	/* In 'p#x.y div.z:a, i.b {}', it's set for ':a' and '.b'. */	int last_fragment = 0;	/* In 'p#x .y', it's set for 'p' and '.y'. Note that it is always set in	 * the previous iteration so it's valid for the current token only	 * before "saving" the token. */	int selector_start = 1;	/* FIXME: element can be even '*' --pasky */	while (scanner_has_tokens(scanner)) {		struct scanner_token *token = get_scanner_token(scanner);		struct scanner_token last_token;		struct css_selector *selector;		enum css_selector_relation reltype = CSR_ROOT;		enum css_selector_type seltype = CST_ELEMENT;		assert(token);		assert(!last_fragment);		if (token->type == '{'		    || token->type == '}'		    || token->type == ';')			break;		/* Examine the selector fragment */		if (token->type != CSS_TOKEN_IDENT) {			switch (token->type) {			case CSS_TOKEN_HASH:			case CSS_TOKEN_HEX_COLOR:				seltype = CST_ID;				reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;				break;			case '.':				seltype = CST_CLASS;				reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;				break;			case ':':				seltype = CST_PSEUDO;				reltype = selector_start ? CSR_ANCESTOR : CSR_SPECIFITY;				break;			case '>':				seltype = CST_ELEMENT;				reltype = CSR_PARENT;				break;			default:				/* FIXME: Temporary fix for this weird CSS				 * precedence thing. ')' has higher than ','				 * and it can cause problems when skipping				 * here. The reason is for the function()				 * parsing. Hmm... --jonas */				if (!skip_css_tokens(scanner, ','))					skip_scanner_token(scanner);				seltype = CST_INVALID;				break;			}			if (seltype == CST_INVALID)				continue;			/* Hexcolor and hash already contains the ident			 * inside. */			if (token->type != CSS_TOKEN_HEX_COLOR			    && token->type != CSS_TOKEN_HASH) {				token = get_next_scanner_token(scanner);				if (!token) break;				if (token->type != CSS_TOKEN_IDENT) /* wtf */					continue;			} else {				/* Skip the leading '#'. */				token->string++, token->length--;			}		} else {			if (pkg) reltype = CSR_ANCESTOR;		}		/* Look ahead at what's coming next */		copy_struct(&last_token, token);		/* Detect whether upcoming tokens are separated by		 * whitespace or not (that's important for determining		 * whether it's a combinator or specificitier). */		if (last_token.string + last_token.length < scanner->end) {			selector_start = last_token.string[last_token.length];			selector_start = (selector_start != '#'			                  && selector_start != '.'			                  && selector_start != ':');		} /* else it doesn't matter as we are gonna bail out anyway. */		token = get_next_scanner_token(scanner);		if (!token) break;		last_fragment = (token->type == ',' || token->type == '{');		/* Register the selector */		if (!pkg) {			selector = get_css_base_selector(			                last_fragment ? css : NULL, seltype,					CSR_ROOT,					last_token.string, last_token.length);			if (!selector) continue;			pkg = mem_calloc(1, sizeof(*pkg));			if (!pkg) continue;			add_to_list(*selectors, pkg);			pkg->selector = selector;		} else if (reltype == CSR_SPECIFITY) {			/* We append under the last fragment. */			struct css_selector *base_sel = prev_specific_selector;			if (!base_sel) base_sel = prev_element_selector;			assert(base_sel);			selector = get_css_selector(&base_sel->leaves,			                            seltype, reltype,						    last_token.string,						    last_token.length);			if (!selector) continue;			if (last_chained_selector) {				/* The situation is like: 'div p#x', now it was				 * 'p -> div', but we need to redo that as				 * '(p ->) #x -> div'. */				del_from_list(last_chained_selector);				add_to_list(selector->leaves,				            last_chained_selector);			}			if (pkg->selector == base_sel) {				/* This is still just specificitying offspring				 * of the previous pkg->selector. */				pkg->selector = selector;			}			if (last_fragment) {				/* This is the last fragment of the selector				 * chain, that means the last base fragment				 * wasn't marked so and thus wasn't bound to				 * the stylesheet. Let's do that now. */				assert(prev_element_selector);				prev_element_selector->relation = CSR_ROOT;				prev_element_selector =					reparent_selector(&css->selectors,					                 prev_element_selector,							 &pkg->selector);			}		} else /* CSR_PARENT || CSR_ANCESTOR */ {			/* We - in the perlish speak - unshift in front			 * of the previous selector fragment and reparent			 * it to the upcoming one. */			selector = get_css_base_selector(			                last_fragment ? css : NULL, seltype,					CSR_ROOT,					last_token.string, last_token.length);			if (!selector) continue;			assert(prev_element_selector);			add_to_list(selector->leaves, prev_element_selector);			last_chained_selector = prev_element_selector;			prev_element_selector->relation = reltype;		}		/* Record the selector fragment for future generations */		if (reltype == CSR_SPECIFITY) {			prev_specific_selector = selector;		} else {			prev_element_selector = selector;			prev_specific_selector = NULL;		}		/* What to do next */		if (last_fragment) {			/* Next selector coming, clean up. */			pkg = NULL; last_fragment = 0; selector_start = 1;			prev_element_selector = NULL;			prev_specific_selector = NULL;			last_chained_selector = NULL;		}		if (token->type == ',') {			/* Another selector hooked to these properties. */			skip_scanner_token(scanner);		} else if (token->type == '{') {			/* End of selector list. */			break;		} /* else Another selector fragment probably coming up. */	}	/* Wipe the selector we were currently composing, if any. */	if (pkg) {		if (prev_element_selector)			done_css_selector(prev_element_selector);		del_from_list(pkg);		mem_free(pkg);	}}/* Ruleset grammar: * * ruleset: *	  selector [ ',' selector ]* '{' properties '}' */static voidcss_parse_ruleset(struct css_stylesheet *css, struct scanner *scanner){	INIT_LIST_HEAD(selectors);	INIT_LIST_HEAD(properties);	struct selector_pkg *pkg;	css_parse_selector(css, scanner, &selectors);	if (list_empty(selectors)	    || !skip_css_tokens(scanner, '{')) {		if (!list_empty(selectors)) free_list(selectors);		skip_css_tokens(scanner, '}');		return;	}	/* We don't handle the case where a property has already been added to	 * a selector. That doesn't matter though, because the best one will be	 * always the last one (FIXME: 'important!'), therefore the applier	 * will take it last and it will have the "final" effect.	 *	 * So it's only a little waste and no real harm. The thing is, what do	 * you do when you have 'background: #fff' and then 'background:	 * x-repeat'? It would require yet another logic to handle merging of	 * these etc and the induced overhead would in most cases mean more	 * waste that having the property multiple times in a selector, I	 * believe. --pasky */	pkg = selectors.next;	css_parse_properties(&properties, scanner);	skip_css_tokens(scanner, '}');	/* Mirror the properties to all the selectors. */	foreach (pkg, selectors) {#ifdef DEBUG_CSS		DBG("Binding properties (!!%d) to selector %s (type %d, relation %d, children %d)",			!list_empty(properties),			pkg->selector->name, pkg->selector->type,			pkg->selector->relation,			!list_empty(pkg->selector->leaves));#endif		add_selector_properties(pkg->selector, &properties);	}	free_list(selectors);	free_list(properties);}voidcss_parse_stylesheet(struct css_stylesheet *css, struct uri *base_uri,		     unsigned char *string, unsigned char *end){	struct scanner scanner;	init_scanner(&scanner, &css_scanner_info, string, end);	while (scanner_has_tokens(&scanner)) {		struct scanner_token *token = get_scanner_token(&scanner);		assert(token);		switch (token->type) {		case CSS_TOKEN_AT_KEYWORD:		case CSS_TOKEN_AT_CHARSET:		case CSS_TOKEN_AT_FONT_FACE:		case CSS_TOKEN_AT_IMPORT:		case CSS_TOKEN_AT_MEDIA:		case CSS_TOKEN_AT_PAGE:			css_parse_atrule(css, &scanner, base_uri);			break;		default:			/* And WHAT ELSE could it be?! */			css_parse_ruleset(css, &scanner);		}	}#ifdef DEBUG_CSS	dump_css_selector_tree(&css->selectors);	WDBG("That's it.");#endif}
parser.c - 源码说明

本页面展示了「elinks下lynx是最重要的二个文本浏览器, 在linux下非常实用, elinks也是gentoo安装过程中默认使用的浏览器, 这是elinks源代码」中的 parser.c 源码文件，采用 C语言编程语言编写，共 530 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与elinks相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?