📄 http.c
字号:
assert (pc_last_host_ip == NULL); /* This lookup_host cannot fail, because it has the results in the cache. */ pc_last_host_ip = lookup_host (host, 1); assert (pc_last_host_ip != NULL); pc_last_port = port; pc_last_fd = fd; pc_active_p = 1;#ifdef HAVE_SSL pc_last_ssl = ssl; pc_active_ssl = ssl ? 1 : 0;#endif DEBUGP (("Registered fd %d for persistent reuse.\n", fd));}#ifdef HAVE_SSL# define SHUTDOWN_SSL(ssl) do { \ if (ssl) \ shutdown_ssl (ssl); \} while (0)#else# define SHUTDOWN_SSL(ssl) #endif/* Return non-zero if a persistent connection is available for connecting to HOST:PORT. */#ifdef HAVE_SSLstatic intpersistent_available_p (const char *host, unsigned short port, int ssl){#elsestatic intpersistent_available_p (const char *host, unsigned short port){#endif int success; struct address_list *this_host_ip; /* First, check whether a persistent connection is active at all. */ if (!pc_active_p) return 0; /* Second, check if the active connection pertains to the correct (HOST, PORT) ordered pair. */ if (port != pc_last_port) return 0;#ifdef HAVE_SSL /* Second, a): check if current connection is (not) ssl, too. This test is unlikely to fail because HTTP and HTTPS typicaly use different ports. Yet it is possible, or so I [Christian Fraenkel] have been told, to run HTTPS and HTTP simultaneus on the same port. */ if (ssl != pc_active_ssl) return 0;#endif /* HAVE_SSL */ this_host_ip = lookup_host (host, 1); if (!this_host_ip) return 0; /* To equate the two host names for the purposes of persistent connections, they need to share all the IP addresses in the list. */ success = address_list_match_all (pc_last_host_ip, this_host_ip); address_list_release (this_host_ip); if (!success) return 0; /* Third: check whether the connection is still open. This is important because most server implement a liberal (short) timeout on persistent connections. Wget can of course always reconnect if the connection doesn't work out, but it's nicer to know in advance. This test is a logical followup of the first test, but is "expensive" and therefore placed at the end of the list. */ if (!test_socket_open (pc_last_fd)) { /* Oops, the socket is no longer open. Now that we know that, let's invalidate the persistent connection before returning 0. */ CLOSE (pc_last_fd);#ifdef HAVE_SSL SHUTDOWN_SSL (pc_last_ssl); pc_last_ssl = NULL;#endif invalidate_persistent (); return 0; } return 1;}/* The idea behind these two CLOSE macros is to distinguish between two cases: one when the job we've been doing is finished, and we want to close the connection and leave, and two when something is seriously wrong and we're closing the connection as part of cleanup. In case of keep_alive, CLOSE_FINISH should leave the connection open, while CLOSE_INVALIDATE should still close it. Note that the semantics of the flag `keep_alive' is "this connection *will* be reused (the server has promised not to close the connection once we're done)", while the semantics of `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an active, registered connection". */#define CLOSE_FINISH(fd) do { \ if (!keep_alive) \ { \ SHUTDOWN_SSL (ssl); \ CLOSE (fd); \ if (pc_active_p && (fd) == pc_last_fd) \ invalidate_persistent (); \ } \} while (0)#define CLOSE_INVALIDATE(fd) do { \ SHUTDOWN_SSL (ssl); \ CLOSE (fd); \ if (pc_active_p && (fd) == pc_last_fd) \ invalidate_persistent (); \} while (0)struct http_stat{ long len; /* received length */ long contlen; /* expected length */ long restval; /* the restart value */ int res; /* the result of last read */ char *newloc; /* new location (redirection) */ char *remote_time; /* remote time-stamp string */ char *error; /* textual HTTP error */ int statcode; /* status code */ double dltime; /* time of the download in msecs */ int no_truncate; /* whether truncating the file is forbidden. */ const char *referer; /* value of the referer header. */ char **local_file; /* local file. */};static voidfree_hstat (struct http_stat *hs){ FREE_MAYBE (hs->newloc); FREE_MAYBE (hs->remote_time); FREE_MAYBE (hs->error); /* Guard against being called twice. */ hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL;}static char *create_authorization_line PARAMS ((const char *, const char *, const char *, const char *, const char *));static char *basic_authentication_encode PARAMS ((const char *, const char *, const char *));static int known_authentication_scheme_p PARAMS ((const char *));time_t http_atotm PARAMS ((const char *));#define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ && (ISSPACE (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1]))/* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it will print it if there is enough information to do so (almost always), returning the error to the caller (i.e. http_loop). Various HTTP parameters are stored to hs. Although it parses the response code correctly, it is not used in a sane way. The caller can do that, though. If PROXY is non-NULL, the connection will be made to the proxy server, and u->url will be requested. */static uerr_tgethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy){ char *request, *type, *command, *full_path; char *user, *passwd; char *pragma_h, *referer, *useragent, *range, *wwwauth; char *authenticate_h; char *proxyauth; char *all_headers; char *port_maybe; char *request_keep_alive; int sock, hcount, all_length, statcode; int write_error; long contlen, contrange; struct url *conn; FILE *fp; int auth_tried_already; struct rbuf rbuf;#ifdef HAVE_SSL static SSL_CTX *ssl_ctx = NULL; SSL *ssl = NULL;#endif char *cookies = NULL; /* Whether this connection will be kept alive after the HTTP request is done. */ int keep_alive; /* Flags that detect the two ways of specifying HTTP keep-alive response. */ int http_keep_alive_1, http_keep_alive_2; /* Whether keep-alive should be inhibited. */ int inhibit_keep_alive; /* Whether we need to print the host header with braces around host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual "Host: symbolic-name:1234". */ int squares_around_host = 0; /* Headers sent when using POST. */ char *post_content_type, *post_content_length; long post_data_size = 0;#ifdef HAVE_SSL /* initialize ssl_ctx on first run */ if (!ssl_ctx) { uerr_t err = init_ssl (&ssl_ctx); if (err != 0) { switch (err) { case SSLERRCTXCREATE: /* this is fatal */ logprintf (LOG_NOTQUIET, _("Failed to set up an SSL context\n")); ssl_printerrors (); return err; case SSLERRCERTFILE: /* try without certfile */ logprintf (LOG_NOTQUIET, _("Failed to load certificates from %s\n"), opt.sslcertfile); ssl_printerrors (); logprintf (LOG_NOTQUIET, _("Trying without the specified certificate\n")); break; case SSLERRCERTKEY: logprintf (LOG_NOTQUIET, _("Failed to get certificate key from %s\n"), opt.sslcertkey); ssl_printerrors (); logprintf (LOG_NOTQUIET, _("Trying without the specified certificate\n")); break; default: break; } } }#endif /* HAVE_SSL */ if (!(*dt & HEAD_ONLY)) /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to know the local filename so we can save to it. */ assert (*hs->local_file != NULL); authenticate_h = 0; auth_tried_already = 0; inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL; again: /* We need to come back here when the initial attempt to retrieve without authorization header fails. (Expected to happen at least for the Digest authorization scheme.) */ keep_alive = 0; http_keep_alive_1 = http_keep_alive_2 = 0; post_content_type = NULL; post_content_length = NULL; /* Initialize certain elements of struct http_stat. */ hs->len = 0L; hs->contlen = -1; hs->res = -1; hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL; /* If we're using a proxy, we will be connecting to the proxy server. */ conn = proxy ? proxy : u; /* First: establish the connection. */ if (inhibit_keep_alive ||#ifndef HAVE_SSL !persistent_available_p (conn->host, conn->port)#else !persistent_available_p (conn->host, conn->port, u->scheme == SCHEME_HTTPS)#endif /* HAVE_SSL */ ) { struct address_list *al = lookup_host (conn->host, 0); if (!al) return HOSTERR; set_connection_host_name (conn->host); sock = connect_to_many (al, conn->port, 0); set_connection_host_name (NULL); address_list_release (al); if (sock < 0) return CONNECT_ERROR (errno);#ifdef HAVE_SSL if (conn->scheme == SCHEME_HTTPS) if (connect_ssl (&ssl, ssl_ctx,sock) != 0) { logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n")); ssl_printerrors (); CLOSE (sock); return CONSSLERR; }#endif /* HAVE_SSL */ } else { logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), conn->host, conn->port); /* #### pc_last_fd should be accessed through an accessor function. */ sock = pc_last_fd;#ifdef HAVE_SSL ssl = pc_last_ssl;#endif /* HAVE_SSL */ DEBUGP (("Reusing fd %d.\n", sock)); } if (*dt & HEAD_ONLY) command = "HEAD"; else if (opt.post_file_name || opt.post_data) command = "POST"; else command = "GET"; referer = NULL; if (hs->referer) { referer = (char *)alloca (9 + strlen (hs->referer) + 3); sprintf (referer, "Referer: %s\r\n", hs->referer); } if (*dt & SEND_NOCACHE) pragma_h = "Pragma: no-cache\r\n"; else pragma_h = ""; if (hs->restval) { range = (char *)alloca (13 + numdigit (hs->restval) + 4); /* Gag me! Some servers (e.g. WebSitePro) have been known to respond to the following `Range' format by generating a multipart/x-byte-ranges MIME document! This MIME type was present in an old draft of the byteranges specification. HTTP/1.1 specifies a multipart/byte-ranges MIME type, but only if multiple non-overlapping ranges are requested -- which Wget never does. */ sprintf (range, "Range: bytes=%ld-\r\n", hs->restval); } else range = NULL; if (opt.useragent) STRDUP_ALLOCA (useragent, opt.useragent); else { useragent = (char *)alloca (10 + strlen (version_string)); sprintf (useragent, "Wget/%s", version_string); } /* Construct the authentication, if userid is present. */ user = u->user; passwd = u->passwd; search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0); user = user ? user : opt.http_user; passwd = passwd ? passwd : opt.http_passwd; wwwauth = NULL; if (user && passwd) { if (!authenticate_h) { /* We have the username and the password, but haven't tried any authorization yet. Let's see if the "Basic" method works. If not, we'll come back here and construct a proper authorization method with the right challenges. If we didn't employ this kind of logic, every URL that requires authorization would have to be processed twice, which is very suboptimal and generates a bunch of false "unauthorized" errors in the server log. #### But this logic also has a serious problem when used with stronger authentications: we *first* transmit the username and the password in clear text, and *then* attempt a stronger authentication scheme. That cannot be right! We are only fortunate that almost everyone still uses the `Basic' scheme anyway. There should be an option to prevent this from happening, for those who use strong authentication schemes and value their passwords. */ wwwauth = basic_authentication_encode (user, passwd, "Authorization"); } else { /* Use the full path, i.e. one that includes the leading slash and the query string, but is independent of proxy setting. */ char *pth = url_full_path (u); wwwauth = create_authorization_line (authenticate_h, user, passwd, command, pth); xfree (pth); } } proxyauth = NULL; if (proxy) { char *proxy_user, *proxy_passwd; /* For normal username and password, URL components override command-line/wgetrc parameters. With proxy authentication, it's the reverse, because proxy URLs are normally the "permanent" ones, so command-line args should take precedence. */ if (opt.proxy_user && opt.proxy_passwd)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -