📄 url.c
字号:
/* Resolve "." and ".." elements of PATH by destructively modifying PATH and return non-zero if PATH has been modified, zero otherwise. The algorithm is in spirit similar to the one described in rfc1808, although implemented differently, in one pass. To recap, path elements containing only "." are removed, and ".." is taken to mean "back up one element". Single leading and trailing slashes are preserved. For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive test examples are provided below. If you change anything in this function, run test_path_simplify to make sure you haven't broken a test case. */static intpath_simplify (char *path){ char *h = path; /* hare */ char *t = path; /* tortoise */ char *beg = path; /* boundary for backing the tortoise */ char *end = path + strlen (path); while (h < end) { /* Hare should be at the beginning of a path element. */ if (h[0] == '.' && (h[1] == '/' || h[1] == '\0')) { /* Ignore "./". */ h += 2; } else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0')) { /* Handle "../" by retreating the tortoise by one path element -- but not past beggining. */ if (t > beg) { /* Move backwards until T hits the beginning of the previous path element or the beginning of path. */ for (--t; t > beg && t[-1] != '/'; t--) ; } else { /* If we're at the beginning, copy the "../" literally move the beginning so a later ".." doesn't remove it. */ beg = t + 3; goto regular; } h += 3; } else { regular: /* A regular path element. If H hasn't advanced past T, simply skip to the next path element. Otherwise, copy the path element until the next slash. */ if (t == h) { /* Skip the path element, including the slash. */ while (h < end && *h != '/') t++, h++; if (h < end) t++, h++; } else { /* Copy the path element, including the final slash. */ while (h < end && *h != '/') *t++ = *h++; if (h < end) *t++ = *h++; } } } if (t != h) *t = '\0'; return t != h;}/* Return the length of URL's path. Path is considered to be terminated by one of '?', ';', '#', or by the end of the string. */static intpath_length (const char *url){ const char *q = strpbrk_or_eos (url, "?;#"); return q - url;}/* Find the last occurrence of character C in the range [b, e), or NULL, if none are present. We might want to use memrchr (a GNU extension) under GNU libc. */static const char *find_last_char (const char *b, const char *e, char c){ for (; e > b; e--) if (*e == c) return e; return NULL;}/* Merge BASE with LINK and return the resulting URI. Either of the URIs may be absolute or relative, complete with the host name, or path only. This tries to reasonably handle all foreseeable cases. It only employs minimal URL parsing, without knowledge of the specifics of schemes. I briefly considered making this function call path_simplify after the merging process, as rfc1738 seems to suggest. This is a bad idea for several reasons: 1) it complexifies the code, and 2) url_parse has to simplify path anyway, so it's wasteful to boot. */char *uri_merge (const char *base, const char *link){ int linklength; const char *end; char *merge; if (url_has_scheme (link)) return xstrdup (link); /* We may not examine BASE past END. */ end = base + path_length (base); linklength = strlen (link); if (!*link) { /* Empty LINK points back to BASE, query string and all. */ return xstrdup (base); } else if (*link == '?') { /* LINK points to the same location, but changes the query string. Examples: */ /* uri_merge("path", "?new") -> "path?new" */ /* uri_merge("path?foo", "?new") -> "path?new" */ /* uri_merge("path?foo#bar", "?new") -> "path?new" */ /* uri_merge("path#foo", "?new") -> "path?new" */ int baselength = end - base; merge = xmalloc (baselength + linklength + 1); memcpy (merge, base, baselength); memcpy (merge + baselength, link, linklength); merge[baselength + linklength] = '\0'; } else if (*link == '#') { /* uri_merge("path", "#new") -> "path#new" */ /* uri_merge("path#foo", "#new") -> "path#new" */ /* uri_merge("path?foo", "#new") -> "path?foo#new" */ /* uri_merge("path?foo#bar", "#new") -> "path?foo#new" */ int baselength; const char *end1 = strchr (base, '#'); if (!end1) end1 = base + strlen (base); baselength = end1 - base; merge = xmalloc (baselength + linklength + 1); memcpy (merge, base, baselength); memcpy (merge + baselength, link, linklength); merge[baselength + linklength] = '\0'; } else if (*link == '/' && *(link + 1) == '/') { /* LINK begins with "//" and so is a net path: we need to replace everything after (and including) the double slash with LINK. */ /* uri_merge("foo", "//new/bar") -> "//new/bar" */ /* uri_merge("//old/foo", "//new/bar") -> "//new/bar" */ /* uri_merge("http://old/foo", "//new/bar") -> "http://new/bar" */ int span; const char *slash; const char *start_insert; /* Look for first slash. */ slash = memchr (base, '/', end - base); /* If found slash and it is a double slash, then replace from this point, else default to replacing from the beginning. */ if (slash && *(slash + 1) == '/') start_insert = slash; else start_insert = base; span = start_insert - base; merge = (char *)xmalloc (span + linklength + 1); if (span) memcpy (merge, base, span); memcpy (merge + span, link, linklength); merge[span + linklength] = '\0'; } else if (*link == '/') { /* LINK is an absolute path: we need to replace everything after (and including) the FIRST slash with LINK. So, if BASE is "http://host/whatever/foo/bar", and LINK is "/qux/xyzzy", our result should be "http://host/qux/xyzzy". */ int span; const char *slash; const char *start_insert = NULL; /* for gcc to shut up. */ const char *pos = base; int seen_slash_slash = 0; /* We're looking for the first slash, but want to ignore double slash. */ again: slash = memchr (pos, '/', end - pos); if (slash && !seen_slash_slash) if (*(slash + 1) == '/') { pos = slash + 2; seen_slash_slash = 1; goto again; } /* At this point, SLASH is the location of the first / after "//", or the first slash altogether. START_INSERT is the pointer to the location where LINK will be inserted. When examining the last two examples, keep in mind that LINK begins with '/'. */ if (!slash && !seen_slash_slash) /* example: "foo" */ /* ^ */ start_insert = base; else if (!slash && seen_slash_slash) /* example: "http://foo" */ /* ^ */ start_insert = end; else if (slash && !seen_slash_slash) /* example: "foo/bar" */ /* ^ */ start_insert = base; else if (slash && seen_slash_slash) /* example: "http://something/" */ /* ^ */ start_insert = slash; span = start_insert - base; merge = (char *)xmalloc (span + linklength + 1); if (span) memcpy (merge, base, span); memcpy (merge + span, link, linklength); merge[span + linklength] = '\0'; } else { /* LINK is a relative URL: we need to replace everything after last slash (possibly empty) with LINK. So, if BASE is "whatever/foo/bar", and LINK is "qux/xyzzy", our result should be "whatever/foo/qux/xyzzy". */ int need_explicit_slash = 0; int span; const char *start_insert; const char *last_slash = find_last_char (base, end, '/'); if (!last_slash) { /* No slash found at all. Replace what we have with LINK. */ start_insert = base; } else if (last_slash && last_slash >= base + 2 && last_slash[-2] == ':' && last_slash[-1] == '/') { /* example: http://host" */ /* ^ */ start_insert = end + 1; need_explicit_slash = 1; } else { /* example: "whatever/foo/bar" */ /* ^ */ start_insert = last_slash + 1; } span = start_insert - base; merge = (char *)xmalloc (span + linklength + 1); if (span) memcpy (merge, base, span); if (need_explicit_slash) merge[span - 1] = '/'; memcpy (merge + span, link, linklength); merge[span + linklength] = '\0'; } return merge;}#define APPEND(p, s) do { \ int len = strlen (s); \ memcpy (p, s, len); \ p += len; \} while (0)/* Use this instead of password when the actual password is supposed to be hidden. We intentionally use a generic string without giving away the number of characters in the password, like previous versions did. */#define HIDDEN_PASSWORD "*password*"/* Recreate the URL string from the data in URL. If HIDE is non-zero (as it is when we're calling this on a URL we plan to print, but not when calling it to canonicalize a URL for use within the program), password will be hidden. Unsafe characters in the URL will be quoted. */char *url_string (const struct url *url, int hide_password){ int size; char *result, *p; char *quoted_host, *quoted_user = NULL, *quoted_passwd = NULL; int scheme_port = supported_schemes[url->scheme].default_port; const char *scheme_str = supported_schemes[url->scheme].leading_string; int fplen = full_path_length (url); int brackets_around_host; assert (scheme_str != NULL); /* Make sure the user name and password are quoted. */ if (url->user) { quoted_user = url_escape_allow_passthrough (url->user); if (url->passwd) { if (hide_password) quoted_passwd = HIDDEN_PASSWORD; else quoted_passwd = url_escape_allow_passthrough (url->passwd); } } /* In the unlikely event that the host name contains non-printable characters, quote it for displaying to the user. */ quoted_host = url_escape_allow_passthrough (url->host); /* Undo the quoting of colons that URL escaping performs. IPv6 addresses may legally contain colons, and in that case must be placed in square brackets. */ if (quoted_host != url->host) unescape_single_char (quoted_host, ':'); brackets_around_host = strchr (quoted_host, ':') != NULL; size = (strlen (scheme_str) + strlen (quoted_host) + (brackets_around_host ? 2 : 0) + fplen + 1); if (url->port != scheme_port) size += 1 + numdigit (url->port); if (quoted_user) { size += 1 + strlen (quoted_user); if (quoted_passwd) size += 1 + strlen (quoted_passwd); } p = result = xmalloc (size); APPEND (p, scheme_str); if (quoted_user) { APPEND (p, quoted_user); if (quoted_passwd) { *p++ = ':'; APPEND (p, quoted_passwd); } *p++ = '@'; } if (brackets_around_host) *p++ = '['; APPEND (p, quoted_host); if (brackets_around_host) *p++ = ']'; if (url->port != scheme_port) { *p++ = ':'; p = number_to_string (p, url->port); } full_path_write (url, p); p += fplen; *p++ = '\0'; assert (p - result == size); if (quoted_user && quoted_user != url->user) xfree (quoted_user); if (quoted_passwd && !hide_password && quoted_passwd != url->passwd) xfree (quoted_passwd); if (quoted_host != url->host) xfree (quoted_host); return result;}/* Return non-zero if scheme a is similar to scheme b. Schemes are similar if they are equal. If SSL is supported, schemes are also similar if one is http (SCHEME_HTTP) and the other is https (SCHEME_HTTPS). */intschemes_are_similar_p (enum url_scheme a, enum url_scheme b){ if (a == b) return 1;#ifdef HAVE_SSL if ((a == SCHEME_HTTP && b == SCHEME_HTTPS) || (a == SCHEME_HTTPS && b == SCHEME_HTTP)) return 1;#endif return 0;}#if 0/* Debugging and testing support for path_simplify. *//* Debug: run path_simplify on PATH and return the result in a new string. Useful for calling from the debugger. */static char *ps (char *path){ char *copy = xstrdup (path); path_simplify (copy); return copy;}static voidrun_test (char *test, char *expected_result, int expected_change){ char *test_copy = xstrdup (test); int modified = path_simplify (test_copy); if (0 != strcmp (test_copy, expected_result)) { printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n", test, expected_result, test_copy); } if (modified != expected_change) { if (expected_change == 1) printf ("Expected modification with path_simplify(\"%s\").\n", test); else printf ("Expected no modification with path_simplify(\"%s\").\n", test); } xfree (test_copy);}static voidtest_path_simplify (void){ static struct { char *test, *result; int should_modify; } tests[] = { { "", "", 0 }, { ".", "", 1 }, { "./", "", 1 }, { "..", "..", 0 }, { "../", "../", 0 }, { "foo", "foo", 0 }, { "foo/bar", "foo/bar", 0 }, { "foo///bar", "foo///bar", 0 }, { "foo/.", "foo/", 1 }, { "foo/./", "foo/", 1 }, { "foo./", "foo./", 0 }, { "foo/../bar", "bar", 1 }, { "foo/../bar/", "bar/", 1 }, { "foo/bar/..", "foo/", 1 }, { "foo/bar/../x", "foo/x", 1 }, { "foo/bar/../x/", "foo/x/", 1 }, { "foo/..", "", 1 }, { "foo/../..", "..", 1 }, { "foo/../../..", "../..", 1 }, { "foo/../../bar/../../baz", "../../baz", 1 }, { "a/b/../../c", "c", 1 }, { "./a/../b", "b", 1 } }; int i; for (i = 0; i < countof (tests); i++) { char *test = tests[i].test; char *expected_result = tests[i].result; int expected_change = tests[i].should_modify; run_test (test, expected_result, expected_change); }}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -