📄 uri.c

📁 一个很有名的浏览器
💻 C
📖 第 1 页 / 共 3 页
字号:
				}				mem_free(host);			}		}#endif		if (add_host)			add_bytes_to_string(string, uri->host, uri->hostlen);#ifdef CONFIG_IPV6		if (uri->ipv6 && wants(URI_PORT)) add_char_to_string(string, ']');#endif 	} 	if (wants(URI_PORT) || wants(URI_DEFAULT_PORT)) { 		if (uri->portlen) {			add_char_to_string(string, ':');			add_bytes_to_string(string, uri->port, uri->portlen);		} else if (wants(URI_DEFAULT_PORT)			   && uri->protocol != PROTOCOL_USER) {			/* For user protocols we don't know a default port.			 * Should user protocols ports be configurable? */			int port = get_protocol_port(uri->protocol);			add_char_to_string(string, ':');			add_long_to_string(string, port);		}	}	/* Only add slash if we need to separate */	if ((wants(URI_DATA) || wants(URI_POST))	    && wants(~(URI_DATA | URI_PORT))	    && get_protocol_need_slash_after_host(uri->protocol))		add_char_to_string(string, '/');	if (wants(URI_DATA) && uri->datalen)		add_bytes_to_string(string, uri->data, uri->datalen);	/* We can not test uri->datalen here since we need to always	 * add '/'. */	if (wants(URI_PATH) || wants(URI_FILENAME)) {		unsigned char *filename = uri->data;		unsigned char *pos;		assertm(!wants(URI_FILENAME) || components == URI_FILENAME,			"URI_FILENAME should be used alone %d", components);		if (wants(URI_PATH) && !is_uri_dir_sep(uri, *filename)) {			/* FIXME: Add correct separator */			add_char_to_string(string, '/');		}		if (!uri->datalen) return string;		for (pos = filename; *pos && !end_of_dir(*pos); pos++)			if (wants(URI_FILENAME) && is_uri_dir_sep(uri, *pos))				filename = pos + 1;		return add_bytes_to_string(string, filename, pos - filename);	}	if (wants(URI_QUERY) && uri->datalen) {		unsigned char *query = memchr(uri->data, '?', uri->datalen);		assertm(URI_QUERY == components,			"URI_QUERY should be used alone %d", components);		if (!query) return string;		query++;		/* Check fragment and POST_CHAR */		return add_bytes_to_string(string, query, strcspn(query, "#" POST_CHAR_S));	}	if (wants(URI_FRAGMENT) && uri->fragmentlen) {		add_char_to_string(string, '#');		add_bytes_to_string(string, uri->fragment, uri->fragmentlen);	}	if (wants(URI_POST) && uri->post) {		add_char_to_string(string, POST_CHAR);		add_to_string(string, uri->post);	} else if (wants(URI_POST_INFO) && uri->post) {		if (!strncmp(uri->post, "text/plain", 10)) {			add_to_string(string, " (PLAIN TEXT DATA)");		} else if (!strncmp(uri->post, "multipart/form-data;", 20)) {			add_to_string(string, " (MULTIPART FORM DATA)");		} else {			add_to_string(string, " (POST DATA)");		}	}	return string;}#undef wantsunsigned char *get_uri_string(struct uri *uri, enum uri_component components){	struct string string;	if (init_string(&string)	    && add_uri_to_string(&string, uri, components))		return string.source;	done_string(&string);	return NULL;}struct string *add_string_uri_to_string(struct string *string, unsigned char *uristring,			 enum uri_component components){	struct uri uri;	if (parse_uri(&uri, uristring) != URI_ERRNO_OK)		return NULL;	return add_uri_to_string(string, &uri, components);}#define normalize_uri_reparse(str)	normalize_uri(NULL, str)#define normalize_uri_noparse(uri)	normalize_uri(uri, struri(uri))static unsigned char *normalize_uri(struct uri *uri, unsigned char *uristring){	unsigned char *parse_string = uristring;	unsigned char *src, *dest, *path;	int need_slash = 0;	int parse = (uri == NULL);	struct uri uri_struct;	if (!uri) uri = &uri_struct;	/* We need to get the real (proxied) URI but lowercase relevant URI	 * parts along the way. */	do {		if (parse && parse_uri(uri, parse_string) != URI_ERRNO_OK)			return uristring;		assert(uri->data);		/* This is a maybe not the right place but both join_urls() and		 * get_translated_uri() through translate_url() calls this		 * function and then it already works on and modifies an		 * allocated copy. */		convert_to_lowercase(uri->string, uri->protocollen);		if (uri->hostlen) convert_to_lowercase(uri->host, uri->hostlen);		parse = 1;		parse_string = uri->data;	} while (uri->protocol == PROTOCOL_PROXY);	if (get_protocol_free_syntax(uri->protocol))		return uristring;	if (uri->protocol != PROTOCOL_UNKNOWN)		need_slash = get_protocol_need_slash_after_host(uri->protocol);	/* We want to start at the first slash to also reduce URIs like	 * http://host//index.html to http://host/index.html */	path = uri->data - need_slash;	dest = src = path;	/* This loop mangles the URI string by removing directory elevators and	 * other cruft. Example: /.././etc////..//usr/ -> /usr/ */	while (*dest) {		/* If the following pieces are the LAST parts of URL, we remove		 * them as well. See RFC 1808 for details. */		if (end_of_dir(src[0])) {			/* URL data contains no more path. */			memmove(dest, src, strlen(src) + 1);			break;		}		if (!is_uri_dir_sep(uri, src[0])) {			/* This is to reduce indentation */		} else if (src[1] == '.') {			if (!src[2]) {				/* /. - skip the dot */				*dest++ = *src;				*dest = 0;				break;			} else if (is_uri_dir_sep(uri, src[2])) {				/* /./ - strip that.. */				src += 2;				continue;			} else if (src[2] == '.'				   && (is_uri_dir_sep(uri, src[3]) || !src[3])) {				/* /../ or /.. - skip it and preceding element. */				/* First back out the last incrementation of				 * @dest (dest++) to get the position that was				 * last asigned to. */				if (dest > path) dest--;				/* @dest might be pointing to a dir separator				 * so we decrement before any testing. */				while (dest > path) {					dest--;					if (is_uri_dir_sep(uri, *dest)) break;				}				if (!src[3]) {					/* /.. - add ending slash and stop */					*dest++ = *src;					*dest = 0;					break;				}				src += 3;				continue;			}		} else if (is_uri_dir_sep(uri, src[1])) {			/* // - ignore first '/'. */			src += 1;			continue;		}		/* We don't want to access memory past the NUL char. */		*dest = *src++;		if (*dest) dest++;	}	return uristring;}/* The 'file' scheme URI comes in and bastardized URI comes out which consists * of just the complete path to file/directory, which the dumb 'file' protocol * backend can understand. No host parts etc, that is what this function is * supposed to chew. */static struct uri *transform_file_url(struct uri *uri, unsigned char *cwd){	unsigned char *path = uri->data;	assert(uri->protocol == PROTOCOL_FILE && uri->data);	/* Sort out the host part. We currently support only host "localhost"	 * (plus empty host part will be assumed to be "localhost" as well).	 * As our extensions, '.' will reference to the cwd on localhost	 * (originally, when the first thing after file:// wasn't "localhost/",	 * we assumed the cwd as well, and pretended that there's no host part	 * at all) and '..' to the directory parent to cwd. Another extension	 * is that if this is a DOS-like system, the first char in two-char	 * host part is uppercase letter and the second char is a colon, it is	 * assumed to be a local disk specification. */	/* TODO: Use FTP for non-localhost hosts. --pasky */	/* For URL "file://", we open the current directory. Some other	 * browsers instead open root directory, but AFAIK the standard does	 * not specify that and this was the original behaviour and it is more	 * consistent with our file://./ notation. */	/* Who would name their file/dir '...' ? */	if (*path == '.' || !*path) {		struct string dir;		if (!init_string(&dir))			return NULL;		encode_uri_string(&dir, cwd, 0);		/* Either we will end up with '//' and translate_directories()		 * will shorten it or the '/' will mark the inserted cwd as a		 * directory. */		if (*path == '.') *path = '/';		/* Insert the current working directory. */		/* The offset is 7 == sizeof("file://") - 1. */		insert_in_string(&struri(uri), 7, dir.source, dir.length);		done_string(&dir);		return uri;	}#ifdef DOS_FS	if (isasciialpha(path[0]) && path[1] == ':' && dir_sep(path[2]))		return NULL;#endif	for (; *path && !dir_sep(*path); path++);	/* FIXME: We will in fact assume localhost even for non-local hosts,	 * until we will support the FTP transformation. --pasky */	memmove(uri->data, path, strlen(path) + 1);	return uri;}static unsigned char *translate_url(unsigned char *url, unsigned char *cwd);unsigned char *join_urls(struct uri *base, unsigned char *rel){	unsigned char *uristring, *path;	int add_slash = 0;	int translate = 0;	int length = 0;	/* See RFC 1808 */	/* TODO: Support for ';' ? (see the RFC) --pasky */	/* For '#', '?' and '//' we could use get_uri_string() but it might be	 * too expensive since it uses granular allocation scheme. I wouldn't	 * personally mind tho' because it would be cleaner. --jonas */	if (rel[0] == '#') {		/* Strip fragment and post part from the base URI and append		 * the fragment string in @rel. */		length  = base->fragment			? base->fragment - struri(base) - 1			: get_real_uri_length(base);	} else if (rel[0] == '?') {		/* Strip query, fragment and post part from the base URI and		 * append the query string in @rel. */		length  = base->fragment ? base->fragment - struri(base) - 1					 : get_real_uri_length(base);		uristring = memchr(base->data, '?', base->datalen);		if (uristring) length = uristring - struri(base);	} else if (rel[0] == '/' && rel[1] == '/') {		if (!get_protocol_need_slashes(base->protocol))			return NULL;		/* Get `<protocol>:' from the base URI and append the `//' part		 * from @rel. */		length = base->protocollen + 1;		/* We need to sanitize the relative part and add stuff like		 * host slash. */		translate = 1;	}	/* If one of the tests above set @length to something useful */	if (length) {		uristring = memacpy(struri(base), length);		if (!uristring) return NULL;		add_to_strn(&uristring, rel);		if (translate) {			unsigned char *translated;			translated = translate_url(uristring, NULL);			mem_free(uristring);			return translated;		}		return normalize_uri_reparse(uristring);	}	/* Check if there is some protocol name to go for */	length = get_protocol_length(rel);	if (length) {		switch (get_protocol(rel, length)) {		case PROTOCOL_UNKNOWN:		case PROTOCOL_PROXY:			/* Mysteriously proxy URIs are breaking here ... */			break;		case PROTOCOL_FILE:			/* FIXME: Use get_uri_string(base, URI_PATH) as cwd arg			 * to translate_url(). */		default:			uristring = translate_url(rel, NULL);			if (uristring) return uristring;		}	}	assertm(base->data, "bad base url");	if_assert_failed return NULL;	path = base->data;	/* Either is path blank, but we've slash char before, or path is not	 * blank, but doesn't start by a slash (if we'd just stay along with	 * is_uri_dir_sep(&uri, path[-1]) w/o all the surrounding crap, it	 * should be enough, but I'm not sure and I don't want to break	 * anything --pasky). */	/* We skip first char of URL ('/') in parse_url() (ARGH). This	 * is reason of all this bug-bearing magic.. */	if (*path) {		if (!is_uri_dir_sep(base, *path)) path--;	} else {		if (is_uri_dir_sep(base, path[-1])) path--;	}	if (!is_uri_dir_sep(base, rel[0])) {		unsigned char *path_end;		/* The URL is relative. */		if (!*path) {			/* There's no path in the URL, but we're going to add			 * something there, and the something doesn't start by			 * a slash. So we need to insert a slash after the base			 * URL. Clever, eh? ;) */			add_slash = 1;		}		for (path_end = path; *path_end; path_end++) {			if (end_of_dir(*path_end)) break;			/* Modify the path pointer, so that it'll always point			 * above the last '/' in the URL; later, we'll copy the			 * URL only _TO_ this point, and anything after last			 * slash will be substituted by 'rel'. */			if (is_uri_dir_sep(base, *path_end))				path = path_end + 1;		}	}	length = path - struri(base);	uristring = mem_alloc(length + strlen(rel) + add_slash + 1);	if (!uristring) return NULL;	memcpy(uristring, struri(base), length);	if (add_slash) uristring[length] = '/';	strcpy(uristring + length + add_slash, rel);	return normalize_uri_reparse(uristring);}/* Tries to figure out what protocol @newurl might be specifying by checking if * it exists as a file locally or by checking parts of the host name. */static inline enum protocolfind_uri_protocol(unsigned char *newurl){	unsigned char *ch;	/* First see if it is a file so filenames that look like hostnames	 * won't confuse us below. */	if (check_uri_file(newurl) >= 0) return PROTOCOL_FILE;	/* Yes, it would be simpler to make test for IPv6 address first,	 * but it would result in confusing mix of ifdefs ;-). */	/* FIXME: Ideas for improve protocol detection	 *	 * - Handle common hostnames. It could be part of the protocol backend	 *   structure. [ www -> http, irc -> irc, news -> nntp, ... ]	 *	 * - Resolve using port number. [ 119 -> nntp, 443 -> https, ... ]	 */	ch = newurl + strcspn(newurl, ".:/@");	if (*ch == '@' || (*ch == ':' && *newurl != '[')		|| !strncasecmp(newurl, "ftp.", 4)) {		/* Contains user/password/ftp-hostname */		return PROTOCOL_FTP;#ifdef CONFIG_IPV6	} else if (*newurl == '[' && *ch == ':') {		/* Candidate for IPv6 address */		unsigned char *bracket2, *colon2;		ch++;		bracket2 = strchr(ch, ']');		colon2 = strchr(ch, ':');		if (bracket2 && colon2 && bracket2 > colon2)			return PROTOCOL_HTTP;#endif	} else if (*newurl != '.' && *ch == '.') {		/* Contains domain name? */		unsigned char *host_end, *domain;		unsigned char *ipscan;		/* Process the hostname */		for (domain = ch + 1;			*(host_end = domain + strcspn(domain, ".:/?")) == '.';			domain = host_end + 1);		/* It's IP? */		for (ipscan = ch; isdigit(*ipscan) || *ipscan == '.';			ipscan++);		if (!*ipscan || *ipscan == ':' || *ipscan == '/')			return PROTOCOL_HTTP;		/* It's two-letter or known TLD? */		if (host_end - domain == 2		    || end_with_known_tld(domain, host_end - domain) >= 0)			return PROTOCOL_HTTP;	}	/* We default to file:// even though we already tested if the file	 * existed since it will give a "No such file or directory" error.	 * which might better hint the user that there was problem figuring out	 * the URI. */	return PROTOCOL_FILE;}#define MAX_TRANSLATION_ATTEMPTS	32/* Returns an URI string that can be used internally. Adding protocol prefix, * missing slashes etc. */static unsigned char *translate_url(unsigned char *url, unsigned char *cwd){	unsigned char *newurl;	struct uri uri;	enum uri_errno uri_errno, prev_errno = URI_ERRNO_EMPTY;
💿 文件大小 3632 K
👤 上传用户 zp380747972
📂 所属分类网络
🏷️ 相关标签

#浏览器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -