📄 uri.c
字号:
} mem_free(host); } }#endif if (add_host) add_bytes_to_string(string, uri->host, uri->hostlen);#ifdef CONFIG_IPV6 if (uri->ipv6 && wants(URI_PORT)) add_char_to_string(string, ']');#endif } if (wants(URI_PORT) || wants(URI_DEFAULT_PORT)) { if (uri->portlen) { add_char_to_string(string, ':'); add_bytes_to_string(string, uri->port, uri->portlen); } else if (wants(URI_DEFAULT_PORT) && uri->protocol != PROTOCOL_USER) { /* For user protocols we don't know a default port. * Should user protocols ports be configurable? */ int port = get_protocol_port(uri->protocol); add_char_to_string(string, ':'); add_long_to_string(string, port); } } /* Only add slash if we need to separate */ if ((wants(URI_DATA) || wants(URI_POST)) && wants(~(URI_DATA | URI_PORT)) && get_protocol_need_slash_after_host(uri->protocol)) add_char_to_string(string, '/'); if (wants(URI_DATA) && uri->datalen) add_bytes_to_string(string, uri->data, uri->datalen); /* We can not test uri->datalen here since we need to always * add '/'. */ if (wants(URI_PATH) || wants(URI_FILENAME)) { unsigned char *filename = uri->data; unsigned char *pos; assertm(!wants(URI_FILENAME) || components == URI_FILENAME, "URI_FILENAME should be used alone %d", components); if (wants(URI_PATH) && !is_uri_dir_sep(uri, *filename)) { /* FIXME: Add correct separator */ add_char_to_string(string, '/'); } if (!uri->datalen) return string; for (pos = filename; *pos && !end_of_dir(*pos); pos++) if (wants(URI_FILENAME) && is_uri_dir_sep(uri, *pos)) filename = pos + 1; return add_bytes_to_string(string, filename, pos - filename); } if (wants(URI_QUERY) && uri->datalen) { unsigned char *query = memchr(uri->data, '?', uri->datalen); assertm(URI_QUERY == components, "URI_QUERY should be used alone %d", components); if (!query) return string; query++; /* Check fragment and POST_CHAR */ return add_bytes_to_string(string, query, strcspn(query, "#" POST_CHAR_S)); } if (wants(URI_FRAGMENT) && uri->fragmentlen) { add_char_to_string(string, '#'); add_bytes_to_string(string, uri->fragment, uri->fragmentlen); } if (wants(URI_POST) && uri->post) { add_char_to_string(string, POST_CHAR); add_to_string(string, uri->post); } else if (wants(URI_POST_INFO) && uri->post) { if (!strncmp(uri->post, "text/plain", 10)) { add_to_string(string, " (PLAIN TEXT DATA)"); } else if (!strncmp(uri->post, "multipart/form-data;", 20)) { add_to_string(string, " (MULTIPART FORM DATA)"); } else { add_to_string(string, " (POST DATA)"); } } return string;}#undef wantsunsigned char *get_uri_string(struct uri *uri, enum uri_component components){ struct string string; if (init_string(&string) && add_uri_to_string(&string, uri, components)) return string.source; done_string(&string); return NULL;}struct string *add_string_uri_to_string(struct string *string, unsigned char *uristring, enum uri_component components){ struct uri uri; if (parse_uri(&uri, uristring) != URI_ERRNO_OK) return NULL; return add_uri_to_string(string, &uri, components);}#define normalize_uri_reparse(str) normalize_uri(NULL, str)#define normalize_uri_noparse(uri) normalize_uri(uri, struri(uri))static unsigned char *normalize_uri(struct uri *uri, unsigned char *uristring){ unsigned char *parse_string = uristring; unsigned char *src, *dest, *path; int need_slash = 0; int parse = (uri == NULL); struct uri uri_struct; if (!uri) uri = &uri_struct; /* We need to get the real (proxied) URI but lowercase relevant URI * parts along the way. */ do { if (parse && parse_uri(uri, parse_string) != URI_ERRNO_OK) return uristring; assert(uri->data); /* This is a maybe not the right place but both join_urls() and * get_translated_uri() through translate_url() calls this * function and then it already works on and modifies an * allocated copy. */ convert_to_lowercase(uri->string, uri->protocollen); if (uri->hostlen) convert_to_lowercase(uri->host, uri->hostlen); parse = 1; parse_string = uri->data; } while (uri->protocol == PROTOCOL_PROXY); if (get_protocol_free_syntax(uri->protocol)) return uristring; if (uri->protocol != PROTOCOL_UNKNOWN) need_slash = get_protocol_need_slash_after_host(uri->protocol); /* We want to start at the first slash to also reduce URIs like * http://host//index.html to http://host/index.html */ path = uri->data - need_slash; dest = src = path; /* This loop mangles the URI string by removing directory elevators and * other cruft. Example: /.././etc////..//usr/ -> /usr/ */ while (*dest) { /* If the following pieces are the LAST parts of URL, we remove * them as well. See RFC 1808 for details. */ if (end_of_dir(src[0])) { /* URL data contains no more path. */ memmove(dest, src, strlen(src) + 1); break; } if (!is_uri_dir_sep(uri, src[0])) { /* This is to reduce indentation */ } else if (src[1] == '.') { if (!src[2]) { /* /. - skip the dot */ *dest++ = *src; *dest = 0; break; } else if (is_uri_dir_sep(uri, src[2])) { /* /./ - strip that.. */ src += 2; continue; } else if (src[2] == '.' && (is_uri_dir_sep(uri, src[3]) || !src[3])) { /* /../ or /.. - skip it and preceding element. */ /* First back out the last incrementation of * @dest (dest++) to get the position that was * last asigned to. */ if (dest > path) dest--; /* @dest might be pointing to a dir separator * so we decrement before any testing. */ while (dest > path) { dest--; if (is_uri_dir_sep(uri, *dest)) break; } if (!src[3]) { /* /.. - add ending slash and stop */ *dest++ = *src; *dest = 0; break; } src += 3; continue; } } else if (is_uri_dir_sep(uri, src[1])) { /* // - ignore first '/'. */ src += 1; continue; } /* We don't want to access memory past the NUL char. */ *dest = *src++; if (*dest) dest++; } return uristring;}/* The 'file' scheme URI comes in and bastardized URI comes out which consists * of just the complete path to file/directory, which the dumb 'file' protocol * backend can understand. No host parts etc, that is what this function is * supposed to chew. */static struct uri *transform_file_url(struct uri *uri, unsigned char *cwd){ unsigned char *path = uri->data; assert(uri->protocol == PROTOCOL_FILE && uri->data); /* Sort out the host part. We currently support only host "localhost" * (plus empty host part will be assumed to be "localhost" as well). * As our extensions, '.' will reference to the cwd on localhost * (originally, when the first thing after file:// wasn't "localhost/", * we assumed the cwd as well, and pretended that there's no host part * at all) and '..' to the directory parent to cwd. Another extension * is that if this is a DOS-like system, the first char in two-char * host part is uppercase letter and the second char is a colon, it is * assumed to be a local disk specification. */ /* TODO: Use FTP for non-localhost hosts. --pasky */ /* For URL "file://", we open the current directory. Some other * browsers instead open root directory, but AFAIK the standard does * not specify that and this was the original behaviour and it is more * consistent with our file://./ notation. */ /* Who would name their file/dir '...' ? */ if (*path == '.' || !*path) { struct string dir; if (!init_string(&dir)) return NULL; encode_uri_string(&dir, cwd, 0); /* Either we will end up with '//' and translate_directories() * will shorten it or the '/' will mark the inserted cwd as a * directory. */ if (*path == '.') *path = '/'; /* Insert the current working directory. */ /* The offset is 7 == sizeof("file://") - 1. */ insert_in_string(&struri(uri), 7, dir.source, dir.length); done_string(&dir); return uri; }#ifdef DOS_FS if (isasciialpha(path[0]) && path[1] == ':' && dir_sep(path[2])) return NULL;#endif for (; *path && !dir_sep(*path); path++); /* FIXME: We will in fact assume localhost even for non-local hosts, * until we will support the FTP transformation. --pasky */ memmove(uri->data, path, strlen(path) + 1); return uri;}static unsigned char *translate_url(unsigned char *url, unsigned char *cwd);unsigned char *join_urls(struct uri *base, unsigned char *rel){ unsigned char *uristring, *path; int add_slash = 0; int translate = 0; int length = 0; /* See RFC 1808 */ /* TODO: Support for ';' ? (see the RFC) --pasky */ /* For '#', '?' and '//' we could use get_uri_string() but it might be * too expensive since it uses granular allocation scheme. I wouldn't * personally mind tho' because it would be cleaner. --jonas */ if (rel[0] == '#') { /* Strip fragment and post part from the base URI and append * the fragment string in @rel. */ length = base->fragment ? base->fragment - struri(base) - 1 : get_real_uri_length(base); } else if (rel[0] == '?') { /* Strip query, fragment and post part from the base URI and * append the query string in @rel. */ length = base->fragment ? base->fragment - struri(base) - 1 : get_real_uri_length(base); uristring = memchr(base->data, '?', base->datalen); if (uristring) length = uristring - struri(base); } else if (rel[0] == '/' && rel[1] == '/') { if (!get_protocol_need_slashes(base->protocol)) return NULL; /* Get `<protocol>:' from the base URI and append the `//' part * from @rel. */ length = base->protocollen + 1; /* We need to sanitize the relative part and add stuff like * host slash. */ translate = 1; } /* If one of the tests above set @length to something useful */ if (length) { uristring = memacpy(struri(base), length); if (!uristring) return NULL; add_to_strn(&uristring, rel); if (translate) { unsigned char *translated; translated = translate_url(uristring, NULL); mem_free(uristring); return translated; } return normalize_uri_reparse(uristring); } /* Check if there is some protocol name to go for */ length = get_protocol_length(rel); if (length) { switch (get_protocol(rel, length)) { case PROTOCOL_UNKNOWN: case PROTOCOL_PROXY: /* Mysteriously proxy URIs are breaking here ... */ break; case PROTOCOL_FILE: /* FIXME: Use get_uri_string(base, URI_PATH) as cwd arg * to translate_url(). */ default: uristring = translate_url(rel, NULL); if (uristring) return uristring; } } assertm(base->data, "bad base url"); if_assert_failed return NULL; path = base->data; /* Either is path blank, but we've slash char before, or path is not * blank, but doesn't start by a slash (if we'd just stay along with * is_uri_dir_sep(&uri, path[-1]) w/o all the surrounding crap, it * should be enough, but I'm not sure and I don't want to break * anything --pasky). */ /* We skip first char of URL ('/') in parse_url() (ARGH). This * is reason of all this bug-bearing magic.. */ if (*path) { if (!is_uri_dir_sep(base, *path)) path--; } else { if (is_uri_dir_sep(base, path[-1])) path--; } if (!is_uri_dir_sep(base, rel[0])) { unsigned char *path_end; /* The URL is relative. */ if (!*path) { /* There's no path in the URL, but we're going to add * something there, and the something doesn't start by * a slash. So we need to insert a slash after the base * URL. Clever, eh? ;) */ add_slash = 1; } for (path_end = path; *path_end; path_end++) { if (end_of_dir(*path_end)) break; /* Modify the path pointer, so that it'll always point * above the last '/' in the URL; later, we'll copy the * URL only _TO_ this point, and anything after last * slash will be substituted by 'rel'. */ if (is_uri_dir_sep(base, *path_end)) path = path_end + 1; } } length = path - struri(base); uristring = mem_alloc(length + strlen(rel) + add_slash + 1); if (!uristring) return NULL; memcpy(uristring, struri(base), length); if (add_slash) uristring[length] = '/'; strcpy(uristring + length + add_slash, rel); return normalize_uri_reparse(uristring);}/* Tries to figure out what protocol @newurl might be specifying by checking if * it exists as a file locally or by checking parts of the host name. */static inline enum protocolfind_uri_protocol(unsigned char *newurl){ unsigned char *ch; /* First see if it is a file so filenames that look like hostnames * won't confuse us below. */ if (check_uri_file(newurl) >= 0) return PROTOCOL_FILE; /* Yes, it would be simpler to make test for IPv6 address first, * but it would result in confusing mix of ifdefs ;-). */ /* FIXME: Ideas for improve protocol detection * * - Handle common hostnames. It could be part of the protocol backend * structure. [ www -> http, irc -> irc, news -> nntp, ... ] * * - Resolve using port number. [ 119 -> nntp, 443 -> https, ... ] */ ch = newurl + strcspn(newurl, ".:/@"); if (*ch == '@' || (*ch == ':' && *newurl != '[') || !strncasecmp(newurl, "ftp.", 4)) { /* Contains user/password/ftp-hostname */ return PROTOCOL_FTP;#ifdef CONFIG_IPV6 } else if (*newurl == '[' && *ch == ':') { /* Candidate for IPv6 address */ unsigned char *bracket2, *colon2; ch++; bracket2 = strchr(ch, ']'); colon2 = strchr(ch, ':'); if (bracket2 && colon2 && bracket2 > colon2) return PROTOCOL_HTTP;#endif } else if (*newurl != '.' && *ch == '.') { /* Contains domain name? */ unsigned char *host_end, *domain; unsigned char *ipscan; /* Process the hostname */ for (domain = ch + 1; *(host_end = domain + strcspn(domain, ".:/?")) == '.'; domain = host_end + 1); /* It's IP? */ for (ipscan = ch; isdigit(*ipscan) || *ipscan == '.'; ipscan++); if (!*ipscan || *ipscan == ':' || *ipscan == '/') return PROTOCOL_HTTP; /* It's two-letter or known TLD? */ if (host_end - domain == 2 || end_with_known_tld(domain, host_end - domain) >= 0) return PROTOCOL_HTTP; } /* We default to file:// even though we already tested if the file * existed since it will give a "No such file or directory" error. * which might better hint the user that there was problem figuring out * the URI. */ return PROTOCOL_FILE;}#define MAX_TRANSLATION_ATTEMPTS 32/* Returns an URI string that can be used internally. Adding protocol prefix, * missing slashes etc. */static unsigned char *translate_url(unsigned char *url, unsigned char *cwd){ unsigned char *newurl; struct uri uri; enum uri_errno uri_errno, prev_errno = URI_ERRNO_EMPTY;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -