📄 http.c
字号:
fd_close (pconn.socket); xfree (pconn.host); xzero (pconn);}/* Register FD, which should be a TCP/IP connection to HOST:PORT, as persistent. This will enable someone to use the same connection later. In the context of HTTP, this must be called only AFTER the response has been received and the server has promised that the connection will remain alive. If a previous connection was persistent, it is closed. */static voidregister_persistent (const char *host, int port, int fd, int ssl){ if (pconn_active) { if (pconn.socket == fd) { /* The connection FD is already registered. */ return; } else { /* The old persistent connection is still active; close it first. This situation arises whenever a persistent connection exists, but we then connect to a different host, and try to register a persistent connection to that one. */ invalidate_persistent (); } } pconn_active = 1; pconn.socket = fd; pconn.host = xstrdup (host); pconn.port = port; pconn.ssl = ssl; pconn.authorized = 0; DEBUGP (("Registered socket %d for persistent reuse.\n", fd));}/* Return non-zero if a persistent connection is available for connecting to HOST:PORT. */static intpersistent_available_p (const char *host, int port, int ssl, int *host_lookup_failed){ /* First, check whether a persistent connection is active at all. */ if (!pconn_active) return 0; /* If we want SSL and the last connection wasn't or vice versa, don't use it. Checking for host and port is not enough because HTTP and HTTPS can apparently coexist on the same port. */ if (ssl != pconn.ssl) return 0; /* If we're not connecting to the same port, we're not interested. */ if (port != pconn.port) return 0; /* If the host is the same, we're in business. If not, there is still hope -- read below. */ if (0 != strcasecmp (host, pconn.host)) { /* Check if pconn.socket is talking to HOST under another name. This happens often when both sites are virtual hosts distinguished only by name and served by the same network interface, and hence the same web server (possibly set up by the ISP and serving many different web sites). This admittedly unconventional optimization does not contradict HTTP and works well with popular server software. */ int found; ip_address ip; struct address_list *al; if (ssl) /* Don't try to talk to two different SSL sites over the same secure connection! (Besides, it's not clear that name-based virtual hosting is even possible with SSL.) */ return 0; /* If pconn.socket's peer is one of the IP addresses HOST resolves to, pconn.socket is for all intents and purposes already talking to HOST. */ if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER)) { /* Can't get the peer's address -- something must be very wrong with the connection. */ invalidate_persistent (); return 0; } al = lookup_host (host, 0); if (!al) { *host_lookup_failed = 1; return 0; } found = address_list_contains (al, &ip); address_list_release (al); if (!found) return 0; /* The persistent connection's peer address was found among the addresses HOST resolved to; therefore, pconn.sock is in fact already talking to HOST -- no need to reconnect. */ } /* Finally, check whether the connection is still open. This is important because most server implement a liberal (short) timeout on persistent connections. Wget can of course always reconnect if the connection doesn't work out, but it's nicer to know in advance. This test is a logical followup of the first test, but is "expensive" and therefore placed at the end of the list. */ if (!test_socket_open (pconn.socket)) { /* Oops, the socket is no longer open. Now that we know that, let's invalidate the persistent connection before returning 0. */ invalidate_persistent (); return 0; } return 1;}/* The idea behind these two CLOSE macros is to distinguish between two cases: one when the job we've been doing is finished, and we want to close the connection and leave, and two when something is seriously wrong and we're closing the connection as part of cleanup. In case of keep_alive, CLOSE_FINISH should leave the connection open, while CLOSE_INVALIDATE should still close it. Note that the semantics of the flag `keep_alive' is "this connection *will* be reused (the server has promised not to close the connection once we're done)", while the semantics of `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an active, registered connection". */#define CLOSE_FINISH(fd) do { \ if (!keep_alive) \ { \ if (pconn_active && (fd) == pconn.socket) \ invalidate_persistent (); \ else \ { \ fd_close (fd); \ fd = -1; \ } \ } \} while (0)#define CLOSE_INVALIDATE(fd) do { \ if (pconn_active && (fd) == pconn.socket) \ invalidate_persistent (); \ else \ fd_close (fd); \ fd = -1; \} while (0)struct http_stat{ wgint len; /* received length */ wgint contlen; /* expected length */ wgint restval; /* the restart value */ int res; /* the result of last read */ char *newloc; /* new location (redirection) */ char *remote_time; /* remote time-stamp string */ char *error; /* textual HTTP error */ int statcode; /* status code */ wgint rd_size; /* amount of data read from socket */ double dltime; /* time it took to download the data */ const char *referer; /* value of the referer header. */ char **local_file; /* local file. */};static voidfree_hstat (struct http_stat *hs){ xfree_null (hs->newloc); xfree_null (hs->remote_time); xfree_null (hs->error); /* Guard against being called twice. */ hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL;}static char *create_authorization_line PARAMS ((const char *, const char *, const char *, const char *, const char *, int *));static char *basic_authentication_encode PARAMS ((const char *, const char *));static int known_authentication_scheme_p PARAMS ((const char *, const char *));time_t http_atotm PARAMS ((const char *));#define BEGINS_WITH(line, string_constant) \ (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \ && (ISSPACE (line[sizeof (string_constant) - 1]) \ || !line[sizeof (string_constant) - 1]))#define SET_USER_AGENT(req) do { \ if (!opt.useragent) \ request_set_header (req, "User-Agent", \ aprintf ("Wget/%s", version_string), rel_value); \ else if (*opt.useragent) \ request_set_header (req, "User-Agent", opt.useragent, rel_none); \} while (0)/* The flags that allow clobbering the file (opening with "wb"). Defined here to avoid repetition later. #### This will require rework. */#define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \ || opt.dirstruct || opt.output_document)/* Retrieve a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it will print it if there is enough information to do so (almost always), returning the error to the caller (i.e. http_loop). Various HTTP parameters are stored to hs. If PROXY is non-NULL, the connection will be made to the proxy server, and u->url will be requested. */static uerr_tgethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy){ struct request *req; char *type; char *user, *passwd; char *proxyauth; int statcode; int write_error; wgint contlen, contrange; struct url *conn; FILE *fp; int sock = -1; int flags; /* Set to 1 when the authorization has failed permanently and should not be tried again. */ int auth_finished = 0; /* Whether NTLM authentication is used for this request. */ int ntlm_seen = 0; /* Whether our connection to the remote host is through SSL. */ int using_ssl = 0; /* Whether a HEAD request will be issued (as opposed to GET or POST). */ int head_only = *dt & HEAD_ONLY; char *head; struct response *resp; char hdrval[256]; char *message; /* Whether this connection will be kept alive after the HTTP request is done. */ int keep_alive; /* Whether keep-alive should be inhibited. RFC 2068 requests that 1.0 clients not send keep-alive requests to proxies. This is because many 1.0 proxies do not interpret the Connection header and transfer it to the remote server, causing it to not close the connection and leave both the proxy and the client hanging. */ int inhibit_keep_alive = !opt.http_keep_alive || opt.ignore_length || proxy != NULL; /* Headers sent when using POST. */ wgint post_data_size = 0; int host_lookup_failed = 0;#ifdef HAVE_SSL if (u->scheme == SCHEME_HTTPS) { /* Initialize the SSL context. After this has once been done, it becomes a no-op. */ if (!ssl_init ()) { scheme_disable (SCHEME_HTTPS); logprintf (LOG_NOTQUIET, _("Disabling SSL due to encountered errors.\n")); return SSLINITFAILED; } }#endif /* HAVE_SSL */ if (!head_only) /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to know the local filename so we can save to it. */ assert (*hs->local_file != NULL); /* Initialize certain elements of struct http_stat. */ hs->len = 0; hs->contlen = -1; hs->res = -1; hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL; conn = u; /* Prepare the request to send. */ req = request_new (); { char *meth_arg; const char *meth = "GET"; if (head_only) meth = "HEAD"; else if (opt.post_file_name || opt.post_data) meth = "POST"; /* Use the full path, i.e. one that includes the leading slash and the query string. E.g. if u->path is "foo/bar" and u->query is "param=value", full_path will be "/foo/bar?param=value". */ if (proxy#ifdef HAVE_SSL /* When using SSL over proxy, CONNECT establishes a direct connection to the HTTPS server. Therefore use the same argument as when talking to the server directly. */ && u->scheme != SCHEME_HTTPS#endif ) meth_arg = xstrdup (u->url); else meth_arg = url_full_path (u); request_set_method (req, meth, meth_arg); } request_set_header (req, "Referer", (char *) hs->referer, rel_none); if (*dt & SEND_NOCACHE) request_set_header (req, "Pragma", "no-cache", rel_none); if (hs->restval) request_set_header (req, "Range", aprintf ("bytes=%s-", number_to_static_string (hs->restval)), rel_value); SET_USER_AGENT (req); request_set_header (req, "Accept", "*/*", rel_none); /* Find the username and password for authentication. */ user = u->user; passwd = u->passwd; search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0); user = user ? user : (opt.http_user ? opt.http_user : opt.user); passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd); if (user && passwd) { /* We have the username and the password, but haven't tried any authorization yet. Let's see if the "Basic" method works. If not, we'll come back here and construct a proper authorization method with the right challenges. If we didn't employ this kind of logic, every URL that requires authorization would have to be processed twice, which is very suboptimal and generates a bunch of false "unauthorized" errors in the server log. #### But this logic also has a serious problem when used with stronger authentications: we *first* transmit the username and the password in clear text, and *then* attempt a stronger authentication scheme. That cannot be right! We are only fortunate that almost everyone still uses the `Basic' scheme anyway. There should be an option to prevent this from happening, for those who use strong authentication schemes and value their passwords. */ request_set_header (req, "Authorization", basic_authentication_encode (user, passwd), rel_value); } proxyauth = NULL; if (proxy) { char *proxy_user, *proxy_passwd; /* For normal username and password, URL components override command-line/wgetrc parameters. With proxy authentication, it's the reverse, because proxy URLs are normally the "permanent" ones, so command-line args should take precedence. */ if (opt.proxy_user && opt.proxy_passwd) { proxy_user = opt.proxy_user; proxy_passwd = opt.proxy_passwd; } else { proxy_user = proxy->user; proxy_passwd = proxy->passwd; } /* #### This does not appear right. Can't the proxy request, say, `Digest' authentication? */ if (proxy_user && proxy_passwd) proxyauth = basic_authentication_encode (proxy_user, proxy_passwd); /* If we're using a proxy, we will be connecting to the proxy server. */ conn = proxy; /* Proxy authorization over SSL is handled below. */#ifdef HAVE_SSL if (u->scheme != SCHEME_HTTPS)#endif request_set_header (req, "Proxy-Authorization", proxyauth, rel_value); } { /* Whether we need to print the host header with braces around host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the usual "Host: symbolic-name:1234". */ int squares = strchr (u->host, ':') != NULL; if (u->port == scheme_default_port (u->scheme)) request_set_header (req, "Host", aprintf (squares ? "[%s]" : "%s", u->host), rel_value); else request_set_header (req, "Host", aprintf (squares ? "[%s]:%d" : "%s:%d", u->host, u->port), rel_value); } if (!inhibit_keep_alive) request_set_header (req, "Connection", "Keep-Alive", rel_none); if (opt.cookies) request_set_header (req, "Cookie", cookie_header (wget_cookie_jar,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -