📄 url.c
字号:
return SCHEME_INVALID;}#define SCHEME_CHAR(ch) (ISALNUM (ch) || (ch) == '-' || (ch) == '+')/* Return 1 if the URL begins with any "scheme", 0 otherwise. As currently implemented, it returns true if URL begins with [-+a-zA-Z0-9]+: . */boolurl_has_scheme (const char *url){ const char *p = url; /* The first char must be a scheme char. */ if (!*p || !SCHEME_CHAR (*p)) return false; ++p; /* Followed by 0 or more scheme chars. */ while (*p && SCHEME_CHAR (*p)) ++p; /* Terminated by ':'. */ return *p == ':';}intscheme_default_port (enum url_scheme scheme){ return supported_schemes[scheme].default_port;}voidscheme_disable (enum url_scheme scheme){ supported_schemes[scheme].flags |= scm_disabled;}/* Skip the username and password, if present in the URL. The function should *not* be called with the complete URL, but with the portion after the scheme. If no username and password are found, return URL. */static const char *url_skip_credentials (const char *url){ /* Look for '@' that comes before terminators, such as '/', '?', '#', or ';'. */ const char *p = (const char *)strpbrk (url, "@/?#;"); if (!p || *p != '@') return url; return p + 1;}/* Parse credentials contained in [BEG, END). The region is expected to have come from a URL and is unescaped. */static boolparse_credentials (const char *beg, const char *end, char **user, char **passwd){ char *colon; const char *userend; if (beg == end) return false; /* empty user name */ colon = memchr (beg, ':', end - beg); if (colon == beg) return false; /* again empty user name */ if (colon) { *passwd = strdupdelim (colon + 1, end); userend = colon; url_unescape (*passwd); } else { *passwd = NULL; userend = end; } *user = strdupdelim (beg, userend); url_unescape (*user); return true;}/* Used by main.c: detect URLs written using the "shorthand" URL forms originally popularized by Netscape and NcFTP. HTTP shorthands look like this: www.foo.com[:port]/dir/file -> http://www.foo.com[:port]/dir/file www.foo.com[:port] -> http://www.foo.com[:port] FTP shorthands look like this: foo.bar.com:dir/file -> ftp://foo.bar.com/dir/file foo.bar.com:/absdir/file -> ftp://foo.bar.com//absdir/file If the URL needs not or cannot be rewritten, return NULL. */char *rewrite_shorthand_url (const char *url){ const char *p; char *ret; if (url_scheme (url) != SCHEME_INVALID) return NULL; /* Look for a ':' or '/'. The former signifies NcFTP syntax, the latter Netscape. */ p = strpbrk (url, ":/"); if (p == url) return NULL; /* If we're looking at "://", it means the URL uses a scheme we don't support, which may include "https" when compiled without SSL support. Don't bogusly rewrite such URLs. */ if (p && p[0] == ':' && p[1] == '/' && p[2] == '/') return NULL; if (p && *p == ':') { /* Colon indicates ftp, as in foo.bar.com:path. Check for special case of http port number ("localhost:10000"). */ int digits = strspn (p + 1, "0123456789"); if (digits && (p[1 + digits] == '/' || p[1 + digits] == '\0')) goto http; /* Turn "foo.bar.com:path" to "ftp://foo.bar.com/path". */ ret = aprintf ("ftp://%s", url); ret[6 + (p - url)] = '/'; } else { http: /* Just prepend "http://" to URL. */ ret = aprintf ("http://%s", url); } return ret;}static void split_path (const char *, char **, char **);/* Like strpbrk, with the exception that it returns the pointer to the terminating zero (end-of-string aka "eos") if no matching character is found. */static inline char *strpbrk_or_eos (const char *s, const char *accept){ char *p = strpbrk (s, accept); if (!p) p = strchr (s, '\0'); return p;}/* Turn STR into lowercase; return true if a character was actually changed. */static boollowercase_str (char *str){ bool changed = false; for (; *str; str++) if (ISUPPER (*str)) { changed = true; *str = TOLOWER (*str); } return changed;}static const char *init_seps (enum url_scheme scheme){ static char seps[8] = ":/"; char *p = seps + 2; int flags = supported_schemes[scheme].flags; if (flags & scm_has_params) *p++ = ';'; if (flags & scm_has_query) *p++ = '?'; if (flags & scm_has_fragment) *p++ = '#'; *p++ = '\0'; return seps;}static const char *parse_errors[] = {#define PE_NO_ERROR 0 N_("No error"),#define PE_UNSUPPORTED_SCHEME 1 N_("Unsupported scheme"),#define PE_INVALID_HOST_NAME 2 N_("Invalid host name"),#define PE_BAD_PORT_NUMBER 3 N_("Bad port number"),#define PE_INVALID_USER_NAME 4 N_("Invalid user name"),#define PE_UNTERMINATED_IPV6_ADDRESS 5 N_("Unterminated IPv6 numeric address"),#define PE_IPV6_NOT_SUPPORTED 6 N_("IPv6 addresses not supported"),#define PE_INVALID_IPV6_ADDRESS 7 N_("Invalid IPv6 numeric address")};/* Parse a URL. Return a new struct url if successful, NULL on error. In case of error, and if ERROR is not NULL, also set *ERROR to the appropriate error code. */struct url *url_parse (const char *url, int *error){ struct url *u; const char *p; bool path_modified, host_modified; enum url_scheme scheme; const char *seps; const char *uname_b, *uname_e; const char *host_b, *host_e; const char *path_b, *path_e; const char *params_b, *params_e; const char *query_b, *query_e; const char *fragment_b, *fragment_e; int port; char *user = NULL, *passwd = NULL; char *url_encoded = NULL; int error_code; scheme = url_scheme (url); if (scheme == SCHEME_INVALID) { error_code = PE_UNSUPPORTED_SCHEME; goto error; } url_encoded = reencode_escapes (url); p = url_encoded; p += strlen (supported_schemes[scheme].leading_string); uname_b = p; p = url_skip_credentials (p); uname_e = p; /* scheme://user:pass@host[:port]... */ /* ^ */ /* We attempt to break down the URL into the components path, params, query, and fragment. They are ordered like this: scheme://host[:port][/path][;params][?query][#fragment] */ path_b = path_e = NULL; params_b = params_e = NULL; query_b = query_e = NULL; fragment_b = fragment_e = NULL; /* Initialize separators for optional parts of URL, depending on the scheme. For example, FTP has params, and HTTP and HTTPS have query string and fragment. */ seps = init_seps (scheme); host_b = p; if (*p == '[') { /* Handle IPv6 address inside square brackets. Ideally we'd just look for the terminating ']', but rfc2732 mandates rejecting invalid IPv6 addresses. */ /* The address begins after '['. */ host_b = p + 1; host_e = strchr (host_b, ']'); if (!host_e) { error_code = PE_UNTERMINATED_IPV6_ADDRESS; goto error; }#ifdef ENABLE_IPV6 /* Check if the IPv6 address is valid. */ if (!is_valid_ipv6_address(host_b, host_e)) { error_code = PE_INVALID_IPV6_ADDRESS; goto error; } /* Continue parsing after the closing ']'. */ p = host_e + 1;#else error_code = PE_IPV6_NOT_SUPPORTED; goto error;#endif /* The closing bracket must be followed by a separator or by the null char. */ /* http://[::1]... */ /* ^ */ if (!strchr (seps, *p)) { /* Trailing garbage after []-delimited IPv6 address. */ error_code = PE_INVALID_HOST_NAME; goto error; } } else { p = strpbrk_or_eos (p, seps); host_e = p; } ++seps; /* advance to '/' */ if (host_b == host_e) { error_code = PE_INVALID_HOST_NAME; goto error; } port = scheme_default_port (scheme); if (*p == ':') { const char *port_b, *port_e, *pp; /* scheme://host:port/tralala */ /* ^ */ ++p; port_b = p; p = strpbrk_or_eos (p, seps); port_e = p; /* Allow empty port, as per rfc2396. */ if (port_b != port_e) for (port = 0, pp = port_b; pp < port_e; pp++) { if (!ISDIGIT (*pp)) { /* http://host:12randomgarbage/blah */ /* ^ */ error_code = PE_BAD_PORT_NUMBER; goto error; } port = 10 * port + (*pp - '0'); /* Check for too large port numbers here, before we have a chance to overflow on bogus port values. */ if (port > 0xffff) { error_code = PE_BAD_PORT_NUMBER; goto error; } } } /* Advance to the first separator *after* '/' (either ';' or '?', depending on the scheme). */ ++seps; /* Get the optional parts of URL, each part being delimited by current location and the position of the next separator. */#define GET_URL_PART(sepchar, var) do { \ if (*p == sepchar) \ var##_b = ++p, var##_e = p = strpbrk_or_eos (p, seps); \ ++seps; \} while (0) GET_URL_PART ('/', path); if (supported_schemes[scheme].flags & scm_has_params) GET_URL_PART (';', params); if (supported_schemes[scheme].flags & scm_has_query) GET_URL_PART ('?', query); if (supported_schemes[scheme].flags & scm_has_fragment) GET_URL_PART ('#', fragment);#undef GET_URL_PART assert (*p == 0); if (uname_b != uname_e) { /* http://user:pass@host */ /* ^ ^ */ /* uname_b uname_e */ if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd)) { error_code = PE_INVALID_USER_NAME; goto error; } } u = xnew0 (struct url); u->scheme = scheme; u->host = strdupdelim (host_b, host_e); u->port = port; u->user = user; u->passwd = passwd; u->path = strdupdelim (path_b, path_e); path_modified = path_simplify (u->path); split_path (u->path, &u->dir, &u->file); host_modified = lowercase_str (u->host); /* Decode %HH sequences in host name. This is important not so much to support %HH sequences in host names (which other browser don't), but to support binary characters (which will have been converted to %HH by reencode_escapes). */ if (strchr (u->host, '%')) { url_unescape (u->host); host_modified = true; } if (params_b) u->params = strdupdelim (params_b, params_e); if (query_b) u->query = strdupdelim (query_b, query_e); if (fragment_b) u->fragment = strdupdelim (fragment_b, fragment_e); if (path_modified || u->fragment || host_modified || path_b == path_e) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -