📄 uri.c

📁 一个很有名的浏览器
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* URL parser and translator; implementation of RFC 2396. *//* $Id: uri.c,v 1.304.2.4 2005/05/01 20:57:46 jonas Exp $ */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include <ctype.h>#include <errno.h>#ifdef HAVE_IDNA_H#include <idna.h>#endif#include <stdio.h>#include <stdlib.h>#include <string.h>#include <sys/types.h>#ifdef HAVE_NETDB_H#include <netdb.h> /* OS/2 needs this after sys/types.h */#endif#ifdef HAVE_SYS_SOCKET_H#include <sys/socket.h>#endif#ifdef HAVE_NETINET_IN_H#include <netinet/in.h>#endif#ifdef HAVE_ARPA_INET_H#include <arpa/inet.h>#endif#include "elinks.h"#include "protocol/protocol.h"#include "protocol/uri.h"#include "util/conv.h"#include "util/error.h"#include "util/file.h"#include "util/hash.h"#include "util/memory.h"#include "util/object.h"#include "util/string.h"static inline intend_of_dir(unsigned char c){	return c == POST_CHAR || c == '#' || c == ';' || c == '?';}static inline intis_uri_dir_sep(struct uri *uri, unsigned char pos){	return (uri->protocol == PROTOCOL_FILE ? dir_sep(pos) : pos == '/');}intis_ip_address(unsigned char *address, int addresslen){	/* The @address has well defined limits so it would be a shame to	 * allocate it. */	unsigned char buffer[IP_ADDRESS_BUFFER_SIZE];	if (addresslen >= sizeof(buffer))		return 0;	safe_strncpy(buffer, address, addresslen + 1);#ifdef HAVE_INET_PTON#ifdef CONFIG_IPV6	{		struct sockaddr_in6 addr6;		if (inet_pton(AF_INET6, buffer, &addr6.sin6_addr) > 0)			return 1;	}#endif /* CONFIG_IPV6 */	{		struct in_addr addr4;		if (inet_pton(AF_INET, buffer, &addr4) > 0)			return 1;	}	return 0;#else	/* FIXME: Is this ever the case? */	return 0;#endif /* HAVE_INET_PTON */}intend_with_known_tld(unsigned char *s, int slen){	int i;	static const unsigned char *tld[] =	{ "com", "edu", "net",	  "org", "gov", "mil",	  "int", "biz", "arpa",	  "aero", "coop",	  "info", "museum",	  "name", "pro", NULL };	if (!slen) return -1;	if (slen < 0) slen = strlen(s);	for (i = 0; tld[i]; i++) {		int tldlen = strlen(tld[i]);		int pos = slen - tldlen;		if (pos >= 0 && !strncasecmp(&s[pos], tld[i], tldlen))			return pos;	}	return -1;}static intcheck_uri_file(unsigned char *name){	/* Check POST_CHAR etc ... */	static const unsigned char chars[] = POST_CHAR_S "#?";	int i;	if (file_exists(name))		return strlen(name);	for (i = 0; i < sizeof(chars) - 1; i++) {		unsigned char *pos = strchr(name, chars[i]);		int namelen = -1;		if (!pos) continue;		*pos = 0;		if (file_exists(name))			namelen = strlen(name);		*pos = chars[i];		if (namelen >= 0) return namelen;	}	return -1;}/* Encodes URIs without encoding stuff like fragments and query separators. */static voidencode_file_uri_string(struct string *string, unsigned char *uristring){	int filenamelen = check_uri_file(uristring);	unsigned char saved = 0;	if (filenamelen >= 0) {		saved = uristring[filenamelen];		uristring[filenamelen] = 0;	}	encode_uri_string(string, uristring, 0);	if (filenamelen >= 0) {		uristring[filenamelen] = saved;		add_to_string(string, &uristring[filenamelen]);	}}static inline intget_protocol_length(const unsigned char *url){	unsigned char *end = (unsigned char *) url;	/* Seek the end of the protocol name if any. */	/* RFC1738:	 * scheme  = 1*[ lowalpha | digit | "+" | "-" | "." ]	 * (but per its recommendations we accept "upalpha" too) */	while (isalnum(*end) || *end == '+' || *end == '-' || *end == '.')		end++;	/* Now we make something to support our "IP version in protocol scheme	 * name" hack and silently chop off the last digit if it's there. The	 * IETF's not gonna notice I hope or it'd be going after us hard. */	if (end != url && isdigit(end[-1]))		end--;	/* Also return 0 if there's no protocol name (@end == @url). */	return (*end == ':' || isdigit(*end)) ? end - url : 0;}enum uri_errnoparse_uri(struct uri *uri, unsigned char *uristring){	unsigned char *prefix_end, *host_end;#ifdef CONFIG_IPV6	unsigned char *lbracket, *rbracket;#endif	assertm(uristring, "No uri to parse.");	memset(uri, 0, sizeof(*uri));	/* Nothing to do for an empty url. */	if_assert_failed return 0;	if (!*uristring) return URI_ERRNO_EMPTY;	uri->string = uristring;	uri->protocollen = get_protocol_length(uristring);	/* Invalid */	if (!uri->protocollen) return URI_ERRNO_INVALID_PROTOCOL;	/* Figure out whether the protocol is known */	uri->protocol = get_protocol(struri(uri), uri->protocollen);	prefix_end = uristring + uri->protocollen; /* ':' */	/* Check if there's a digit after the protocol name. */	if (isdigit(*prefix_end)) {		uri->ip_family = uristring[uri->protocollen] - '0';		prefix_end++;	}	if (*prefix_end != ':')		return URI_ERRNO_INVALID_PROTOCOL;	prefix_end++;	/* Skip slashes */	if (prefix_end[0] == '/' && prefix_end[1] == '/') {		if (prefix_end[2] == '/'		    && get_protocol_need_slash_after_host(uri->protocol))			return URI_ERRNO_TOO_MANY_SLASHES;		prefix_end += 2;	} else if (get_protocol_need_slashes(uri->protocol)) {		return URI_ERRNO_NO_SLASHES;	}	if (get_protocol_free_syntax(uri->protocol)) {		uri->data = prefix_end;		uri->datalen = strlen(prefix_end);		return URI_ERRNO_OK;	} else if (uri->protocol == PROTOCOL_FILE) {		int datalen = check_uri_file(prefix_end);		/* Extract the fragment part. */		if (datalen >= 0 && prefix_end[datalen] == '#') {			uri->fragment = prefix_end + datalen + 1;			uri->fragmentlen = strlen(uri->fragment);		} else {			datalen = strlen(prefix_end);		}		uri->data = prefix_end;		uri->datalen = datalen;		return URI_ERRNO_OK;	}	/* Isolate host */#ifdef CONFIG_IPV6	/* Get brackets enclosing IPv6 address */	lbracket = strchr(prefix_end, '[');	if (lbracket) {		rbracket = strchr(lbracket, ']');		/* [address] is handled only inside of hostname part (surprisingly). */		if (rbracket && rbracket < prefix_end + strcspn(prefix_end, "/"))			uri->ipv6 = 1;		else			lbracket = rbracket = NULL;	} else {		rbracket = NULL;	}#endif	/* Possibly skip auth part */	host_end = prefix_end + strcspn(prefix_end, "@");	if (prefix_end + strcspn(prefix_end, "/") > host_end	    && *host_end) { /* we have auth info here */		unsigned char *user_end;		/* Allow '@' in the password component */		while (strcspn(host_end + 1, "@") < strcspn(host_end + 1, "/?"))			host_end = host_end + 1 + strcspn(host_end + 1, "@");		user_end = strchr(prefix_end, ':');		if (!user_end || user_end > host_end) {			uri->user = prefix_end;			uri->userlen = host_end - prefix_end;		} else {			uri->user = prefix_end;			uri->userlen = user_end - prefix_end;			uri->password = user_end + 1;			uri->passwordlen = host_end - user_end - 1;		}		prefix_end = host_end + 1;	}#ifdef CONFIG_IPV6	if (uri->ipv6)		host_end = rbracket + strcspn(rbracket, ":/?");	else#endif		host_end = prefix_end + strcspn(prefix_end, ":/?");#ifdef CONFIG_IPV6	if (uri->ipv6) {		int addrlen = rbracket - lbracket - 1;		/* Check for valid length.		 * addrlen >= sizeof(hostbuf) is theorically impossible		 * but i keep the test in case of... Safer, imho --Zas */		assertm(addrlen >= 0 && addrlen < NI_MAXHOST,			"parse_uri(): addrlen value is bad (%d) for URL '%s'. "			"Problems are likely to be encountered. Please report "			"this, it is a security bug!", addrlen, uristring);		if_assert_failed return URI_ERRNO_IPV6_SECURITY;		uri->host = lbracket + 1;		uri->hostlen = addrlen;	} else#endif	{		uri->host = prefix_end;		uri->hostlen = host_end - prefix_end;		/* Trim trailing '.'s */		if (uri->hostlen && uri->host[uri->hostlen - 1] == '.')			return URI_ERRNO_TRAILING_DOTS;	}	if (*host_end == ':') { /* we have port here */		unsigned char *port_end = host_end + 1 + strcspn(host_end + 1, "/");		host_end++;		uri->port = host_end;		uri->portlen = port_end - host_end;		if (uri->portlen == 0)			return URI_ERRNO_NO_PORT_COLON;		/* We only use 8 bits for portlen so better check */		if (uri->portlen != port_end - host_end)			return URI_ERRNO_INVALID_PORT;		/* test if port is number */		/* TODO: possibly lookup for the service otherwise? --pasky */		for (; host_end < port_end; host_end++)			if (!isdigit(*host_end))				return URI_ERRNO_INVALID_PORT;		/* Check valid port value, and let show an error message		 * about invalid url syntax. */		if (uri->port && uri->portlen) {			int n;			errno = 0;			n = strtol(uri->port, NULL, 10);			if (errno || !uri_port_is_valid(n))				return URI_ERRNO_INVALID_PORT;		}	}	if (*host_end == '/') {		host_end++;	} else if (get_protocol_need_slash_after_host(uri->protocol)) {		/* The need for slash after the host component depends on the		 * need for a host component. -- The dangerous mind of Jonah */		if (!uri->hostlen)			return URI_ERRNO_NO_HOST;		return URI_ERRNO_NO_HOST_SLASH;	}	/* Look for #fragment or POST_CHAR */	prefix_end = host_end + strcspn(host_end, "#" POST_CHAR_S);	uri->data = host_end;	uri->datalen = prefix_end - host_end;	if (*prefix_end == '#') {		uri->fragment = prefix_end + 1;		uri->fragmentlen = strcspn(uri->fragment, POST_CHAR_S);		prefix_end = uri->fragment + uri->fragmentlen;	}	if (*prefix_end == POST_CHAR) {		uri->post = prefix_end + 1;	}	return URI_ERRNO_OK;}intget_uri_port(struct uri *uri){	if (uri->port && uri->portlen) {		unsigned char *end = uri->port;		int port = strtol(uri->port, (char **) &end, 10);		if (end != uri->port) {			assert(uri_port_is_valid(port));			return port;		}	}	return get_protocol_port(uri->protocol);}#define can_compare_uri_components(comp) !(((comp) & (URI_SPECIAL | URI_IDN)))static inline intcompare_component(unsigned char *a, int alen, unsigned char *b, int blen){	/* Check that the length and the strings are both set or unset */	if (alen != blen || !!a != !!b) return 0;	/* Both are unset so that will make a perfect match */	if (!a || !alen) return 1;	/* Let the higher forces decide */	return !memcmp(a, b, blen);}#define wants(x) (components & (x))intcompare_uri(struct uri *a, struct uri *b, enum uri_component components){	if (a == b) return 1;	if (!components) return 0;	assertm(can_compare_uri_components(components),		"compare_uri() is a work in progress. Component unsupported");	return (!wants(URI_PROTOCOL) || a->protocol == b->protocol)		&& (!wants(URI_IP_FAMILY) || a->ip_family == b->ip_family)		&& (!wants(URI_USER)		    || compare_component(a->user, a->userlen, b->user, b->userlen))		&& (!wants(URI_PASSWORD)		    || compare_component(a->password, a->passwordlen, b->password, b->passwordlen))		&& (!wants(URI_HOST)		    || compare_component(a->host, a->hostlen, b->host, b->hostlen))		&& (!wants(URI_PORT)		    || compare_component(a->port, a->portlen, b->port, b->portlen))		&& (!wants(URI_DATA)		    || compare_component(a->data, a->datalen, b->data, b->datalen))		&& (!wants(URI_FRAGMENT)		    || compare_component(a->fragment, a->fragmentlen, b->fragment, b->fragmentlen))		&& (!wants(URI_POST)		    || compare_component(a->post, a->post ? strlen(a->post) : 0, b->post, b->post ? strlen(b->post) : 0));}/* We might need something more intelligent than this Swiss army knife. */struct string *add_uri_to_string(struct string *string, struct uri *uri,		  enum uri_component components){	/* Custom or unknown keep the URI untouched. */	if (uri->protocol == PROTOCOL_UNKNOWN)		return add_to_string(string, struri(uri)); 	if (wants(URI_PROTOCOL)) {		add_bytes_to_string(string, uri->string, uri->protocollen);		if (wants(URI_IP_FAMILY) && uri->ip_family)			add_long_to_string(string, uri->ip_family);		add_char_to_string(string, ':'); 		if (get_protocol_need_slashes(uri->protocol))			add_to_string(string, "//"); 	} 	if (wants(URI_USER) && uri->userlen) {		add_bytes_to_string(string, uri->user, uri->userlen); 		if (wants(URI_PASSWORD) && uri->passwordlen) {			add_char_to_string(string, ':');			add_bytes_to_string(string, uri->password,						    uri->passwordlen); 		}		add_char_to_string(string, '@'); 	} 	if (wants(URI_HOST) && uri->hostlen) {		int add_host = 1;#ifdef CONFIG_IPV6		/* Rationale for wants(URI_PORT): The [notation] was invented		 * so that you can have an IPv6 addy and a port together. So		 * we want to use it when that happens, otherwise we need not		 * bother (that happens only when we want it for DNS anyway).		 * I insist on an implied elegancy of this way, but YMMV. ;-)		 * --pasky */		if (uri->ipv6 && wants(URI_PORT)) add_char_to_string(string, '[');#endif#ifdef CONFIG_IDN		/* Support for the GNU International Domain Name library.		 *		 * http://www.gnu.org/software/libidn/manual/html_node/IDNA-Functions.html		 *		 * Now it is probably not perfect because idna_to_ascii_lz()		 * will be using a ``zero terminated input string encoded in		 * the current locale's character set''. Anyway I don't know		 * how to convert anything to UTF-8 or Unicode. --jonas */		if (wants(URI_IDN)) {			unsigned char *host = memacpy(uri->host, uri->hostlen);			if (host) {				char *idname;				int code = idna_to_ascii_lz(host, &idname, 0);				/* FIXME: Return NULL if it coughed? --jonas */				if (code == IDNA_SUCCESS) {					add_to_string(string, idname);					free(idname);					add_host = 0;
12 3 下一页
💿 文件大小 3632 K
👤 上传用户 zp380747972
📂 所属分类网络
🏷️ 相关标签

#浏览器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -