scanner.c

来自「一个很有名的浏览器」· C语言代码 · 共 361 行
361 行
/* CSS token scanner utilities *//* $Id: scanner.c,v 1.132 2004/09/21 20:54:34 pasky Exp $ */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include <stdio.h>#include <string.h>#include "elinks.h"#include "document/css/scanner.h"#include "util/error.h"#include "util/scanner.h"#include "util/string.h"/* Bitmap entries for the CSS character groups used in the scanner table */enum css_char_group {	CSS_CHAR_ALPHA		= (1 << 0),	CSS_CHAR_DIGIT		= (1 << 1),	CSS_CHAR_HEX_DIGIT	= (1 << 2),	CSS_CHAR_IDENT		= (1 << 3),	CSS_CHAR_IDENT_START	= (1 << 4),	CSS_CHAR_NEWLINE	= (1 << 5),	CSS_CHAR_NON_ASCII	= (1 << 6),	CSS_CHAR_SGML_MARKUP	= (1 << 7),	CSS_CHAR_TOKEN		= (1 << 8),	CSS_CHAR_TOKEN_START	= (1 << 9),	CSS_CHAR_WHITESPACE	= (1 << 10),};static const struct scan_table_info css_scan_table_info[] = {	SCAN_TABLE_RANGE("0", '9', CSS_CHAR_DIGIT | CSS_CHAR_HEX_DIGIT | CSS_CHAR_IDENT),	SCAN_TABLE_RANGE("A", 'F', CSS_CHAR_HEX_DIGIT),	SCAN_TABLE_RANGE("A", 'Z', CSS_CHAR_ALPHA | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),	SCAN_TABLE_RANGE("a", 'f', CSS_CHAR_HEX_DIGIT),	SCAN_TABLE_RANGE("a", 'z', CSS_CHAR_ALPHA | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),	/* For the octal number impared (me including) \241 is 161 --jonas */	SCAN_TABLE_RANGE("\241", 255, CSS_CHAR_NON_ASCII | CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),	SCAN_TABLE_STRING(" \f\n\r\t\v\000", CSS_CHAR_WHITESPACE),	SCAN_TABLE_STRING("\f\n\r",	 CSS_CHAR_NEWLINE),	SCAN_TABLE_STRING("-",		 CSS_CHAR_IDENT),	SCAN_TABLE_STRING(".#@!\"'<-/",	 CSS_CHAR_TOKEN_START),	/* Unicode escape (that we do not handle yet) + other special chars */	SCAN_TABLE_STRING("\\_*",	 CSS_CHAR_IDENT | CSS_CHAR_IDENT_START),	/* This should contain mostly used char tokens like ':' and maybe a few	 * garbage chars that people might put in their CSS code */	SCAN_TABLE_STRING("({});:,.>",	 CSS_CHAR_TOKEN),	SCAN_TABLE_STRING("<!->",	 CSS_CHAR_SGML_MARKUP),	SCAN_TABLE_END,};static const struct scanner_string_mapping css_string_mappings[] = {	{ "Hz",		CSS_TOKEN_FREQUENCY,	CSS_TOKEN_DIMENSION },	{ "cm",		CSS_TOKEN_LENGTH,	CSS_TOKEN_DIMENSION },	{ "deg",	CSS_TOKEN_ANGLE,	CSS_TOKEN_DIMENSION },	{ "em",		CSS_TOKEN_EM,		CSS_TOKEN_DIMENSION },	{ "ex",		CSS_TOKEN_EX,		CSS_TOKEN_DIMENSION },	{ "grad",	CSS_TOKEN_ANGLE,	CSS_TOKEN_DIMENSION },	{ "in",		CSS_TOKEN_LENGTH,	CSS_TOKEN_DIMENSION },	{ "kHz",	CSS_TOKEN_FREQUENCY,	CSS_TOKEN_DIMENSION },	{ "mm",		CSS_TOKEN_LENGTH,	CSS_TOKEN_DIMENSION },	{ "ms",		CSS_TOKEN_TIME,		CSS_TOKEN_DIMENSION },	{ "pc",		CSS_TOKEN_LENGTH,	CSS_TOKEN_DIMENSION },	{ "pt",		CSS_TOKEN_LENGTH,	CSS_TOKEN_DIMENSION },	{ "px",		CSS_TOKEN_LENGTH,	CSS_TOKEN_DIMENSION },	{ "rad",	CSS_TOKEN_ANGLE,	CSS_TOKEN_DIMENSION },	{ "s",		CSS_TOKEN_TIME,		CSS_TOKEN_DIMENSION },	{ "rgb",	CSS_TOKEN_RGB,		CSS_TOKEN_FUNCTION },	{ "url",	CSS_TOKEN_URL,		CSS_TOKEN_FUNCTION },	{ "charset",	CSS_TOKEN_AT_CHARSET,	CSS_TOKEN_AT_KEYWORD },	{ "font-face",	CSS_TOKEN_AT_FONT_FACE,	CSS_TOKEN_AT_KEYWORD },	{ "import",	CSS_TOKEN_AT_IMPORT,	CSS_TOKEN_AT_KEYWORD },	{ "media",	CSS_TOKEN_AT_MEDIA,	CSS_TOKEN_AT_KEYWORD },	{ "page",	CSS_TOKEN_AT_PAGE,	CSS_TOKEN_AT_KEYWORD },	{ NULL, CSS_TOKEN_NONE, CSS_TOKEN_NONE },};static struct scanner_token *scan_css_tokens(struct scanner *scanner);struct scanner_info css_scanner_info = {	css_string_mappings,	css_scan_table_info,	scan_css_tokens,};#define	check_css_table(c, bit)	(css_scanner_info.scan_table[(c)] & (bit))#define	scan_css(scanner, s, bit)					\	while ((s) < (scanner)->end && check_css_table(*(s), bit)) (s)++;#define	scan_back_css(scanner, s, bit)					\	while ((s) >= (scanner)->string && check_css_table(*(s), bit)) (s)--;#define	is_css_ident_start(c)	check_css_table(c, CSS_CHAR_IDENT_START)#define	is_css_ident(c)		check_css_table(c, CSS_CHAR_IDENT)#define	is_css_digit(c)		check_css_table(c, CSS_CHAR_DIGIT)#define	is_css_hexdigit(c)	check_css_table(c, CSS_CHAR_HEX_DIGIT)#define	is_css_char_token(c)	check_css_table(c, CSS_CHAR_TOKEN)#define	is_css_token_start(c)	check_css_table(c, CSS_CHAR_TOKEN_START)#define	skip_css(scanner, s, skipto)					\	while (s < (scanner)->end					\	       && *(s) != (skipto)					\	       && check_css_precedence(*(s), skipto)) {			\		if (isquote(*(s))) {					\			int size = (scanner)->end - (s);		\			unsigned char *end = memchr(s + 1, *(s), size);	\									\			if (end) (s) = end;				\		}							\		(s)++;							\	}static inline voidscan_css_token(struct scanner *scanner, struct scanner_token *token){	unsigned char *string = scanner->position;	unsigned char first_char = *string;	enum css_token_type type = CSS_TOKEN_GARBAGE;	int real_length = -1;	assert(first_char);	token->string = string++;	if (is_css_char_token(first_char)) {		type = first_char;	} else if (is_css_digit(first_char) || first_char == '.') {		scan_css(scanner, string, CSS_CHAR_DIGIT);		/* First scan the full number token */		if (*string == '.') {			string++;			if (is_css_digit(*string)) {				type = CSS_TOKEN_NUMBER;				scan_css(scanner, string, CSS_CHAR_DIGIT);			}		}		/* Check what kind of number we have */		if (*string == '%') {			if (first_char != '.')				type = CSS_TOKEN_PERCENTAGE;			string++;		} else if (!is_css_ident_start(*string)) {			type = CSS_TOKEN_NUMBER;		} else {			unsigned char *ident = string;			scan_css(scanner, string, CSS_CHAR_IDENT);			type = map_scanner_string(scanner, ident, string,						  CSS_TOKEN_DIMENSION);		}	} else if (is_css_ident_start(first_char)) {		scan_css(scanner, string, CSS_CHAR_IDENT);		if (*string == '(') {			unsigned char *function_end = string + 1;			/* Make sure that we have an ending ')' */			skip_css(scanner, function_end, ')');			if (*function_end == ')') {				type = map_scanner_string(scanner, token->string,						string, CSS_TOKEN_FUNCTION);				/* If it is not a known function just skip the				 * how arg stuff so we don't end up generating				 * a lot of useless tokens. */				if (type == CSS_TOKEN_FUNCTION) {					string = function_end;				} else if (type == CSS_TOKEN_URL) {					/* Extracting the URL first removes any					 * leading or ending whitespace and					 * then see if the url is given in a					 * string. If that is the case the					 * string delimiters are also trimmed.					 * This is not totally correct because					 * we should of course handle escape					 * sequences .. but that will have to					 * be fixed later.  */					unsigned char *from = string + 1;					unsigned char *to = function_end - 1;					scan_css(scanner, from, CSS_CHAR_WHITESPACE);					scan_back_css(scanner, to, CSS_CHAR_WHITESPACE);					if (isquote(*from)) from++;					if (isquote(*to)) to--;					token->string = from;					real_length = to - from + 1;					assert(real_length >= 0);					string = function_end;				}				assert(type != CSS_TOKEN_RGB || *string == '(');				assert(type != CSS_TOKEN_URL || *string == ')');				assert(type != CSS_TOKEN_FUNCTION || *string == ')');			}			string++;		} else {			type = CSS_TOKEN_IDENT;		}	} else if (!is_css_token_start(first_char)) {		/* TODO: Better composing of error tokens. For now we just		 * split them down into char tokens */	} else if (first_char == '#') {		/* Check whether it is hexcolor or hash token */		if (is_css_hexdigit(*string)) {			int hexdigits;			scan_css(scanner, string, CSS_CHAR_HEX_DIGIT);			/* Check that the hexdigit sequence is either 3 or 6			 * chars and it isn't just start of some non-hex ident			 * string. */			hexdigits = string - token->string - 1;			if ((hexdigits == 3 || hexdigits == 6)			    && !is_css_ident(*string)) {				type = CSS_TOKEN_HEX_COLOR;			} else {				scan_css(scanner, string, CSS_CHAR_IDENT);				type = CSS_TOKEN_HASH;			}		} else if (is_css_ident(*string)) {			/* Not *_ident_start() because hashes are #<name>. */			scan_css(scanner, string, CSS_CHAR_IDENT);			type = CSS_TOKEN_HASH;		}	} else if (first_char == '@') {		/* Compose token containing @<ident> */		if (is_css_ident_start(*string)) {			unsigned char *ident = string;			/* Scan both ident start and ident */			scan_css(scanner, string, CSS_CHAR_IDENT);			type = map_scanner_string(scanner, ident, string,						  CSS_TOKEN_AT_KEYWORD);		}	} else if (first_char == '!') {		scan_css(scanner, string, CSS_CHAR_WHITESPACE);		if (!strncasecmp(string, "important", 9)) {			type = CSS_TOKEN_IMPORTANT;			string += 9;		}	} else if (isquote(first_char)) {		/* TODO: Escaped delimiters --jonas */		int size = scanner->end - string;		unsigned char *string_end = memchr(string, first_char, size);		if (string_end) {			/* We don't want the delimiters in the token */			token->string++;			real_length = string_end - token->string;			string = string_end + 1;			type = CSS_TOKEN_STRING;		}	} else if (first_char == '<' || first_char == '-') {		/* Try to navigate SGML tagsoup */		if (*string == '/') {			/* Some kind of SGML tag end ... better bail out screaming */			type = CSS_TOKEN_NONE;		} else {			unsigned char *sgml = string;			/* Skip anything looking like SGML "<!--" and "-->"			 * comments */			scan_css(scanner, sgml, CSS_CHAR_SGML_MARKUP);			if (sgml - string >= 2			    && ((first_char == '<' && *string == '!')				|| (first_char == '-' && sgml[-1] == '>'))) {				type = CSS_TOKEN_SKIP;				string = sgml;			}		}	} else if (first_char == '/') {		/* Comments */		if (*string == '*') {			type = CSS_TOKEN_SKIP;			for (string++; string < scanner->end; string++)				if (*string == '*' && string[1] == '/') {					string += 2;					break;				}		}	} else {		INTERNAL("Someone forgot to put code for recognizing tokens "			 "which start with '%c'.", first_char);	}	token->type = type;	token->length = real_length > 0 ? real_length : string - token->string;	token->precedence = get_css_precedence(type);	scanner->position = string;}static struct scanner_token *scan_css_tokens(struct scanner *scanner){	struct scanner_token *table_end = scanner->table + SCANNER_TOKENS;	struct scanner_token *current;	if (!begin_token_scanning(scanner))		return get_scanner_token(scanner);	/* Scan tokens until we fill the table */	for (current = scanner->table + scanner->tokens;	     current < table_end && scanner->position < scanner->end;	     current++) {		scan_css(scanner, scanner->position, CSS_CHAR_WHITESPACE);		if (scanner->position >= scanner->end) break;		scan_css_token(scanner, current);		/* Did some one scream for us to end the madness? */		if (current->type == CSS_TOKEN_NONE) {			scanner->position = NULL;			current--;			break;		}		/* Shall we scratch this token? */		if (current->type == CSS_TOKEN_SKIP) { 			current--;		}	}	return end_token_scanning(scanner, current);}
scanner.c - 源码说明

本页面展示了「一个很有名的浏览器」中的 scanner.c 源码文件，采用 C语言编程语言编写，共 361 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与浏览器相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?