⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 parser.c

📁 一个很有名的浏览器
💻 C
字号:
/* SGML node handling *//* $Id: parser.c,v 1.6.6.2 2005/04/05 21:08:41 jonas Exp $ */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include <stdlib.h>#include <string.h>#include "elinks.h"#include "cache/cache.h"#include "document/document.h"#include "document/dom/navigator.h"#include "document/dom/node.h"#include "document/html/renderer.h" /* TODO: Move get_convert_table() */#include "document/sgml/html/html.h"#include "document/sgml/parser.h"#include "document/sgml/scanner.h"#include "document/sgml/sgml.h"#include "intl/charsets.h"#include "protocol/uri.h"#include "util/error.h"#include "util/lists.h"#include "util/memory.h"#include "util/string.h"/* Functions for adding new nodes to the DOM tree */static inline struct dom_node *add_sgml_document(struct dom_navigator *navigator, struct uri *uri){	unsigned char *string = struri(uri);	int length = strlen(string);	struct dom_node *node = init_dom_node(DOM_NODE_DOCUMENT, string, length);	return node ? push_dom_node(navigator, node) : node;}static inline struct dom_node *add_sgml_element(struct dom_navigator *navigator, struct scanner_token *token){	struct sgml_parser *parser = navigator->data;	struct dom_node *parent = get_dom_navigator_top(navigator)->node;	struct dom_navigator_state *state;	struct sgml_parser_state *pstate;	struct dom_node *node;	node = add_dom_element(parent, token->string, token->length);	if (!node || !push_dom_node(navigator, node))		return NULL;	state = get_dom_navigator_top(navigator);	assert(node == state->node && state->data);	pstate = state->data;	pstate->info = get_sgml_node_info(parser->info->elements, node);	node->data.element.type = pstate->info->type;	return node;}static inline voidadd_sgml_attribute(struct dom_navigator *navigator,		  struct scanner_token *token, struct scanner_token *valtoken){	struct sgml_parser *parser = navigator->data;	struct dom_node *parent = get_dom_navigator_top(navigator)->node;	unsigned char *value = valtoken ? valtoken->string : NULL;	uint16_t valuelen = valtoken ? valtoken->length : 0;	struct sgml_node_info *info;	struct dom_node *node;	node = add_dom_attribute(parent, token->string, token->length,				 value, valuelen);	if (!node || !push_dom_node(navigator, node))		return;	info = get_sgml_node_info(parser->info->attributes, node);	node->data.attribute.type      = info->type;	node->data.attribute.id	       = !!(info->flags & SGML_ATTRIBUTE_IDENTIFIER);	node->data.attribute.reference = !!(info->flags & SGML_ATTRIBUTE_REFERENCE);	if (valtoken && valtoken->type == SGML_TOKEN_STRING)		node->data.attribute.quoted = 1;	pop_dom_node(navigator);}static inline struct dom_node *add_sgml_proc_instruction(struct dom_navigator *navigator, struct scanner_token *token){	struct dom_node *parent = get_dom_navigator_top(navigator)->node;	struct dom_node *node;	/* Split the token in two if we can find a first space separator. */	unsigned char *separator = memchr(token->string, ' ', token->length);	/* Anything before the separator becomes the target name ... */	unsigned char *name = token->string;	int namelen = separator ? separator - token->string : token->length;	/* ... and everything after the instruction value. */	unsigned char *value = separator ? separator + 1 : NULL;	int valuelen = value ? token->length - namelen - 1 : 0;	node = add_dom_proc_instruction(parent, name, namelen, value, valuelen);	if (!node) return NULL;	switch (token->type) {	case SGML_TOKEN_PROCESS_XML:		node->data.proc_instruction.type = DOM_PROC_INSTRUCTION_XML;		break;	case SGML_TOKEN_PROCESS:	default:		node->data.proc_instruction.type = DOM_PROC_INSTRUCTION;	}	if (!push_dom_node(navigator, node))		return NULL;	if (token->type != SGML_TOKEN_PROCESS_XML)		pop_dom_node(navigator);	return node;}static inline voidadd_sgml_node(struct dom_navigator *navigator, enum dom_node_type type, struct scanner_token *token){	struct dom_node *parent = get_dom_navigator_top(navigator)->node;	struct dom_node *node = add_dom_node(parent, type, token->string, token->length);	if (!node) return;	if (token->type == SGML_TOKEN_SPACE)		node->data.text.only_space = 1;	if (push_dom_node(navigator, node))		pop_dom_node(navigator);}#define add_sgml_entityref(nav, t)	add_sgml_node(nav, DOM_NODE_ENTITY_REFERENCE, t)#define add_sgml_text(nav, t)		add_sgml_node(nav, DOM_NODE_TEXT, t)#define add_sgml_comment(nav, t)	add_sgml_node(nav, DOM_NODE_COMMENT, t)static inline voidparse_sgml_attributes(struct dom_navigator *navigator, struct scanner *scanner){	struct scanner_token name;	assert(scanner_has_tokens(scanner)	       && (get_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN	       	   || get_scanner_token(scanner)->type == SGML_TOKEN_PROCESS_XML));	skip_scanner_token(scanner);	while (scanner_has_tokens(scanner)) {		struct scanner_token *token = get_scanner_token(scanner);		assert(token);		switch (token->type) {		case SGML_TOKEN_TAG_END:			skip_scanner_token(scanner);			/* and return */		case SGML_TOKEN_ELEMENT:		case SGML_TOKEN_ELEMENT_BEGIN:		case SGML_TOKEN_ELEMENT_END:		case SGML_TOKEN_ELEMENT_EMPTY_END:			return;		case SGML_TOKEN_IDENT:			copy_struct(&name, token);			/* Skip the attribute name token */			token = get_next_scanner_token(scanner);			if (token && token->type == '=') {				/* If the token is not a valid value token				 * ignore it. */				token = get_next_scanner_token(scanner);				if (token				    && token->type != SGML_TOKEN_IDENT				    && token->type != SGML_TOKEN_ATTRIBUTE				    && token->type != SGML_TOKEN_STRING)					token = NULL;			} else {				token = NULL;			}			add_sgml_attribute(navigator, &name, token);			/* Skip the value token */			if (token)				skip_scanner_token(scanner);			break;		default:			skip_scanner_token(scanner);		}	}}voidparse_sgml_document(struct dom_navigator *navigator, struct scanner *scanner){	while (scanner_has_tokens(scanner)) {		struct scanner_token *token = get_scanner_token(scanner);		switch (token->type) {		case SGML_TOKEN_ELEMENT:		case SGML_TOKEN_ELEMENT_BEGIN:			if (!add_sgml_element(navigator, token)) {				if (token->type == SGML_TOKEN_ELEMENT) {					skip_scanner_token(scanner);					break;				}				skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END);				break;			}			if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {				parse_sgml_attributes(navigator, scanner);			} else {				skip_scanner_token(scanner);			}			break;		case SGML_TOKEN_ELEMENT_EMPTY_END:			pop_dom_node(navigator);			skip_scanner_token(scanner);			break;		case SGML_TOKEN_ELEMENT_END:			if (!token->length) {				pop_dom_node(navigator);			} else {				pop_dom_nodes(navigator, DOM_NODE_ELEMENT,					      token->string, token->length);			}			skip_scanner_token(scanner);			break;		case SGML_TOKEN_NOTATION_COMMENT:			add_sgml_comment(navigator, token);			skip_scanner_token(scanner);			break;		case SGML_TOKEN_NOTATION_ATTLIST:		case SGML_TOKEN_NOTATION_DOCTYPE:		case SGML_TOKEN_NOTATION_ELEMENT:		case SGML_TOKEN_NOTATION_ENTITY:		case SGML_TOKEN_NOTATION:			skip_scanner_token(scanner);			break;		case SGML_TOKEN_PROCESS_XML:			if (!add_sgml_proc_instruction(navigator, token)) {				skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END);				break;			}			parse_sgml_attributes(navigator, scanner);			pop_dom_node(navigator);			break;		case SGML_TOKEN_PROCESS:			add_sgml_proc_instruction(navigator, token);			skip_scanner_token(scanner);			break;		case SGML_TOKEN_ENTITY:			add_sgml_entityref(navigator, token);			skip_scanner_token(scanner);			break;		case SGML_TOKEN_SPACE:		case SGML_TOKEN_TEXT:		default:			add_sgml_text(navigator, token);			skip_scanner_token(scanner);		}	}}static inline voidinit_sgml_parser(struct sgml_parser *parser, struct document *document,		 struct cache_entry *cache_entry, struct sgml_info *info,		 struct string *buffer){	unsigned char *source = buffer->source;	unsigned char *end = source + buffer->length;	memset(parser, 0, sizeof(*parser));	init_scanner(&parser->scanner, &sgml_scanner_info, source, end);	parser->document    = document;	parser->cache_entry = cache_entry;	parser->info	    = info;	if (document->options.plain)		parser->flags |= SGML_PARSER_ADD_ELEMENT_ENDS;}struct dom_node *parse_sgml(struct cache_entry *cached, struct document *document,	   struct string *buffer){	struct dom_navigator navigator;	struct sgml_parser parser;	size_t obj_size = sizeof(struct sgml_parser_state);	init_sgml_parser(&parser, document, cached, &sgml_html_info, buffer);	init_dom_navigator(&navigator, &parser, parser.info->callbacks, obj_size);	parser.root = add_sgml_document(&navigator, document->uri);	if (parser.root) {		parse_sgml_document(&navigator, &parser.scanner);	}	done_dom_navigator(&navigator);	return parser.root;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -