📄 url.c
字号:
if (*p == ':') { const char *pp; char *res; /* If the characters after the colon and before the next slash or end of string are all digits, it's HTTP. */ int digits = 0; for (pp = p + 1; ISDIGIT (*pp); pp++) ++digits; if (digits > 0 && (*pp == '/' || *pp == '\0')) goto http; /* Prepend "ftp://" to the entire URL... */ res = xmalloc (6 + strlen (url) + 1); sprintf (res, "ftp://%s", url); /* ...and replace ':' with '/'. */ res[6 + (p - url)] = '/'; return res; } else { char *res; http: /* Just prepend "http://" to what we have. */ res = xmalloc (7 + strlen (url) + 1); sprintf (res, "http://%s", url); return res; }}static void split_path PARAMS ((const char *, char **, char **));/* Like strpbrk, with the exception that it returns the pointer to the terminating zero (end-of-string aka "eos") if no matching character is found. Although I normally balk at Gcc-specific optimizations, it probably makes sense here: glibc has optimizations that detect strpbrk being called with literal string as ACCEPT and inline the search. That optimization is defeated if strpbrk is hidden within the call to another function. (And no, making strpbrk_or_eos inline doesn't help because the check for literal accept is in the preprocessor.) */#ifdef __GNUC__#define strpbrk_or_eos(s, accept) ({ \ char *SOE_p = strpbrk (s, accept); \ if (!SOE_p) \ SOE_p = (char *)s + strlen (s); \ SOE_p; \})#else /* not __GNUC__ */static char *strpbrk_or_eos (const char *s, const char *accept){ char *p = strpbrk (s, accept); if (!p) p = (char *)s + strlen (s); return p;}#endif/* Turn STR into lowercase; return non-zero if a character was actually changed. */static intlowercase_str (char *str){ int change = 0; for (; *str; str++) if (ISUPPER (*str)) { change = 1; *str = TOLOWER (*str); } return change;}static char *parse_errors[] = {#define PE_NO_ERROR 0 N_("No error"),#define PE_UNSUPPORTED_SCHEME 1 N_("Unsupported scheme"),#define PE_EMPTY_HOST 2 N_("Empty host"),#define PE_BAD_PORT_NUMBER 3 N_("Bad port number"),#define PE_INVALID_USER_NAME 4 N_("Invalid user name"),#define PE_UNTERMINATED_IPV6_ADDRESS 5 N_("Unterminated IPv6 numeric address"),#define PE_IPV6_NOT_SUPPORTED 6 N_("IPv6 addresses not supported"),#define PE_INVALID_IPV6_ADDRESS 7 N_("Invalid IPv6 numeric address")};#ifdef ENABLE_IPV6/* The following two functions were adapted from glibc. */static intis_valid_ipv4_address (const char *str, const char *end){ int saw_digit, octets; int val; saw_digit = 0; octets = 0; val = 0; while (str < end) { int ch = *str++; if (ch >= '0' && ch <= '9') { val = val * 10 + (ch - '0'); if (val > 255) return 0; if (saw_digit == 0) { if (++octets > 4) return 0; saw_digit = 1; } } else if (ch == '.' && saw_digit == 1) { if (octets == 4) return 0; val = 0; saw_digit = 0; } else return 0; } if (octets < 4) return 0; return 1;}static const int NS_INADDRSZ = 4;static const int NS_IN6ADDRSZ = 16;static const int NS_INT16SZ = 2;static intis_valid_ipv6_address (const char *str, const char *end){ static const char xdigits[] = "0123456789abcdef"; const char *curtok; int tp; const char *colonp; int saw_xdigit; unsigned int val; tp = 0; colonp = NULL; if (str == end) return 0; /* Leading :: requires some special handling. */ if (*str == ':') { ++str; if (str == end || *str != ':') return 0; } curtok = str; saw_xdigit = 0; val = 0; while (str < end) { int ch = *str++; const char *pch; /* if ch is a number, add it to val. */ pch = strchr(xdigits, ch); if (pch != NULL) { val <<= 4; val |= (pch - xdigits); if (val > 0xffff) return 0; saw_xdigit = 1; continue; } /* if ch is a colon ... */ if (ch == ':') { curtok = str; if (saw_xdigit == 0) { if (colonp != NULL) return 0; colonp = str + tp; continue; } else if (str == end) { return 0; } if (tp > NS_IN6ADDRSZ - NS_INT16SZ) return 0; tp += NS_INT16SZ; saw_xdigit = 0; val = 0; continue; } /* if ch is a dot ... */ if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ) && is_valid_ipv4_address(curtok, end) == 1) { tp += NS_INADDRSZ; saw_xdigit = 0; break; } return 0; } if (saw_xdigit == 1) { if (tp > NS_IN6ADDRSZ - NS_INT16SZ) return 0; tp += NS_INT16SZ; } if (colonp != NULL) { if (tp == NS_IN6ADDRSZ) return 0; tp = NS_IN6ADDRSZ; } if (tp != NS_IN6ADDRSZ) return 0; return 1;}#endif/* Parse a URL. Return a new struct url if successful, NULL on error. In case of error, and if ERROR is not NULL, also set *ERROR to the appropriate error code. */struct url *url_parse (const char *url, int *error){ struct url *u; const char *p; int path_modified, host_modified; enum url_scheme scheme; const char *uname_b, *uname_e; const char *host_b, *host_e; const char *path_b, *path_e; const char *params_b, *params_e; const char *query_b, *query_e; const char *fragment_b, *fragment_e; int port; char *user = NULL, *passwd = NULL; char *url_encoded = NULL; int error_code; scheme = url_scheme (url); if (scheme == SCHEME_INVALID) { error_code = PE_UNSUPPORTED_SCHEME; goto error; } url_encoded = reencode_escapes (url); p = url_encoded; p += strlen (supported_schemes[scheme].leading_string); uname_b = p; p += url_skip_credentials (p); uname_e = p; /* scheme://user:pass@host[:port]... */ /* ^ */ /* We attempt to break down the URL into the components path, params, query, and fragment. They are ordered like this: scheme://host[:port][/path][;params][?query][#fragment] */ params_b = params_e = NULL; query_b = query_e = NULL; fragment_b = fragment_e = NULL; host_b = p; if (*p == '[') { /* Handle IPv6 address inside square brackets. Ideally we'd just look for the terminating ']', but rfc2732 mandates rejecting invalid IPv6 addresses. */ /* The address begins after '['. */ host_b = p + 1; host_e = strchr (host_b, ']'); if (!host_e) { error_code = PE_UNTERMINATED_IPV6_ADDRESS; goto error; }#ifdef ENABLE_IPV6 /* Check if the IPv6 address is valid. */ if (!is_valid_ipv6_address(host_b, host_e)) { error_code = PE_INVALID_IPV6_ADDRESS; goto error; } /* Continue parsing after the closing ']'. */ p = host_e + 1;#else error_code = PE_IPV6_NOT_SUPPORTED; goto error;#endif } else { p = strpbrk_or_eos (p, ":/;?#"); host_e = p; } if (host_b == host_e) { error_code = PE_EMPTY_HOST; goto error; } port = scheme_default_port (scheme); if (*p == ':') { const char *port_b, *port_e, *pp; /* scheme://host:port/tralala */ /* ^ */ ++p; port_b = p; p = strpbrk_or_eos (p, "/;?#"); port_e = p; if (port_b == port_e) { /* http://host:/whatever */ /* ^ */ error_code = PE_BAD_PORT_NUMBER; goto error; } for (port = 0, pp = port_b; pp < port_e; pp++) { if (!ISDIGIT (*pp)) { /* http://host:12randomgarbage/blah */ /* ^ */ error_code = PE_BAD_PORT_NUMBER; goto error; } port = 10 * port + (*pp - '0'); } } if (*p == '/') { ++p; path_b = p; p = strpbrk_or_eos (p, ";?#"); path_e = p; } else { /* Path is not allowed not to exist. */ path_b = path_e = p; } if (*p == ';') { ++p; params_b = p; p = strpbrk_or_eos (p, "?#"); params_e = p; } if (*p == '?') { ++p; query_b = p; p = strpbrk_or_eos (p, "#"); query_e = p; /* Hack that allows users to use '?' (a wildcard character) in FTP URLs without it being interpreted as a query string delimiter. */ if (scheme == SCHEME_FTP) { query_b = query_e = NULL; path_e = p; } } if (*p == '#') { ++p; fragment_b = p; p += strlen (p); fragment_e = p; } assert (*p == 0); if (uname_b != uname_e) { /* http://user:pass@host */ /* ^ ^ */ /* uname_b uname_e */ if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd)) { error_code = PE_INVALID_USER_NAME; goto error; } } u = (struct url *)xmalloc (sizeof (struct url)); memset (u, 0, sizeof (*u)); u->scheme = scheme; u->host = strdupdelim (host_b, host_e); u->port = port; u->user = user; u->passwd = passwd; u->path = strdupdelim (path_b, path_e); path_modified = path_simplify (u->path); split_path (u->path, &u->dir, &u->file); host_modified = lowercase_str (u->host); if (params_b) u->params = strdupdelim (params_b, params_e); if (query_b) u->query = strdupdelim (query_b, query_e); if (fragment_b) u->fragment = strdupdelim (fragment_b, fragment_e); if (path_modified || u->fragment || host_modified || path_b == path_e) { /* If we suspect that a transformation has rendered what url_string might return different from URL_ENCODED, rebuild u->url using url_string. */ u->url = url_string (u, 0); if (url_encoded != url) xfree ((char *) url_encoded); } else { if (url_encoded == url) u->url = xstrdup (url); else u->url = url_encoded; } url_encoded = NULL; return u; error: /* Cleanup in case of error: */ if (url_encoded && url_encoded != url) xfree (url_encoded); /* Transmit the error code to the caller, if the caller wants to know. */ if (error) *error = error_code; return NULL;}/* Return the error message string from ERROR_CODE, which should have been retrieved from url_parse. The error message is translated. */const char *url_error (int error_code){ assert (error_code >= 0 && error_code < countof (parse_errors)); return _(parse_errors[error_code]);}/* Split PATH into DIR and FILE. PATH comes from the URL and is expected to be URL-escaped. The path is split into directory (the part up to the last slash) and file (the part after the last slash), which are subsequently unescaped. Examples: PATH DIR FILE "foo/bar/baz" "foo/bar" "baz" "foo/bar/" "foo/bar" "" "foo" "" "foo" "foo/bar/baz%2fqux" "foo/bar" "baz/qux" (!) DIR and FILE are freshly allocated. */static voidsplit_path (const char *path, char **dir, char **file){ char *last_slash = strrchr (path, '/'); if (!last_slash) { *dir = xstrdup (""); *file = xstrdup (path); } else { *dir = strdupdelim (path, last_slash); *file = xstrdup (last_slash + 1); } url_unescape (*dir); url_unescape (*file);}/* Note: URL's "full path" is the path with the query string and params appended. The "fragment" (#foo) is intentionally ignored, but that might be changed. For example, if the original URL was "http://host:port/foo/bar/baz;bullshit?querystring#uselessfragment", the full path will be "/foo/bar/baz;bullshit?querystring". *//* Return the length of the full path, without the terminating zero. */static int
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -