📄 http.c

📁 wget讓你可以在console介面下
💻 C
📖 第 1 页 / 共 5 页
字号:
  fd_close (pconn.socket);  xfree (pconn.host);  xzero (pconn);}/* Register FD, which should be a TCP/IP connection to HOST:PORT, as   persistent.  This will enable someone to use the same connection   later.  In the context of HTTP, this must be called only AFTER the   response has been received and the server has promised that the   connection will remain alive.   If a previous connection was persistent, it is closed. */static voidregister_persistent (const char *host, int port, int fd, int ssl){  if (pconn_active)    {      if (pconn.socket == fd)	{	  /* The connection FD is already registered. */	  return;	}      else	{	  /* The old persistent connection is still active; close it	     first.  This situation arises whenever a persistent	     connection exists, but we then connect to a different	     host, and try to register a persistent connection to that	     one.  */	  invalidate_persistent ();	}    }  pconn_active = 1;  pconn.socket = fd;  pconn.host = xstrdup (host);  pconn.port = port;  pconn.ssl = ssl;  pconn.authorized = 0;  DEBUGP (("Registered socket %d for persistent reuse.\n", fd));}/* Return non-zero if a persistent connection is available for   connecting to HOST:PORT.  */static intpersistent_available_p (const char *host, int port, int ssl,			int *host_lookup_failed){  /* First, check whether a persistent connection is active at all.  */  if (!pconn_active)    return 0;  /* If we want SSL and the last connection wasn't or vice versa,     don't use it.  Checking for host and port is not enough because     HTTP and HTTPS can apparently coexist on the same port.  */  if (ssl != pconn.ssl)    return 0;  /* If we're not connecting to the same port, we're not interested. */  if (port != pconn.port)    return 0;  /* If the host is the same, we're in business.  If not, there is     still hope -- read below.  */  if (0 != strcasecmp (host, pconn.host))    {      /* Check if pconn.socket is talking to HOST under another name.	 This happens often when both sites are virtual hosts	 distinguished only by name and served by the same network	 interface, and hence the same web server (possibly set up by	 the ISP and serving many different web sites).  This	 admittedly unconventional optimization does not contradict	 HTTP and works well with popular server software.  */      int found;      ip_address ip;      struct address_list *al;      if (ssl)	/* Don't try to talk to two different SSL sites over the same	   secure connection!  (Besides, it's not clear that	   name-based virtual hosting is even possible with SSL.)  */	return 0;      /* If pconn.socket's peer is one of the IP addresses HOST	 resolves to, pconn.socket is for all intents and purposes	 already talking to HOST.  */      if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))	{	  /* Can't get the peer's address -- something must be very	     wrong with the connection.  */	  invalidate_persistent ();	  return 0;	}      al = lookup_host (host, 0);      if (!al)	{	  *host_lookup_failed = 1;	  return 0;	}      found = address_list_contains (al, &ip);      address_list_release (al);      if (!found)	return 0;      /* The persistent connection's peer address was found among the	 addresses HOST resolved to; therefore, pconn.sock is in fact	 already talking to HOST -- no need to reconnect.  */    }  /* Finally, check whether the connection is still open.  This is     important because most server implement a liberal (short) timeout     on persistent connections.  Wget can of course always reconnect     if the connection doesn't work out, but it's nicer to know in     advance.  This test is a logical followup of the first test, but     is "expensive" and therefore placed at the end of the list.  */  if (!test_socket_open (pconn.socket))    {      /* Oops, the socket is no longer open.  Now that we know that,         let's invalidate the persistent connection before returning         0.  */      invalidate_persistent ();      return 0;    }  return 1;}/* The idea behind these two CLOSE macros is to distinguish between   two cases: one when the job we've been doing is finished, and we   want to close the connection and leave, and two when something is   seriously wrong and we're closing the connection as part of   cleanup.   In case of keep_alive, CLOSE_FINISH should leave the connection   open, while CLOSE_INVALIDATE should still close it.   Note that the semantics of the flag `keep_alive' is "this   connection *will* be reused (the server has promised not to close   the connection once we're done)", while the semantics of   `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an   active, registered connection".  */#define CLOSE_FINISH(fd) do {			\  if (!keep_alive)				\    {						\      if (pconn_active && (fd) == pconn.socket)	\	invalidate_persistent ();		\      else					\	{					\	  fd_close (fd);			\	  fd = -1;				\	}					\    }						\} while (0)#define CLOSE_INVALIDATE(fd) do {		\  if (pconn_active && (fd) == pconn.socket)	\    invalidate_persistent ();			\  else						\    fd_close (fd);				\  fd = -1;					\} while (0)struct http_stat{  wgint len;			/* received length */  wgint contlen;			/* expected length */  wgint restval;			/* the restart value */  int res;			/* the result of last read */  char *newloc;			/* new location (redirection) */  char *remote_time;		/* remote time-stamp string */  char *error;			/* textual HTTP error */  int statcode;			/* status code */  wgint rd_size;			/* amount of data read from socket */  double dltime;		/* time it took to download the data */  const char *referer;		/* value of the referer header. */  char **local_file;		/* local file. */};static voidfree_hstat (struct http_stat *hs){  xfree_null (hs->newloc);  xfree_null (hs->remote_time);  xfree_null (hs->error);  /* Guard against being called twice. */  hs->newloc = NULL;  hs->remote_time = NULL;  hs->error = NULL;}static char *create_authorization_line PARAMS ((const char *, const char *,						const char *, const char *,						const char *, int *));static char *basic_authentication_encode PARAMS ((const char *, const char *));static int known_authentication_scheme_p PARAMS ((const char *, const char *));time_t http_atotm PARAMS ((const char *));#define BEGINS_WITH(line, string_constant)				\  (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)	\   && (ISSPACE (line[sizeof (string_constant) - 1])			\       || !line[sizeof (string_constant) - 1]))#define SET_USER_AGENT(req) do {					\  if (!opt.useragent)							\    request_set_header (req, "User-Agent",				\			aprintf ("Wget/%s", version_string), rel_value); \  else if (*opt.useragent)						\    request_set_header (req, "User-Agent", opt.useragent, rel_none);	\} while (0)/* The flags that allow clobbering the file (opening with "wb").   Defined here to avoid repetition later.  #### This will require   rework.  */#define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \		       || opt.dirstruct || opt.output_document)/* Retrieve a document through HTTP protocol.  It recognizes status   code, and correctly handles redirections.  It closes the network   socket.  If it receives an error from the functions below it, it   will print it if there is enough information to do so (almost   always), returning the error to the caller (i.e. http_loop).   Various HTTP parameters are stored to hs.   If PROXY is non-NULL, the connection will be made to the proxy   server, and u->url will be requested.  */static uerr_tgethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy){  struct request *req;  char *type;  char *user, *passwd;  char *proxyauth;  int statcode;  int write_error;  wgint contlen, contrange;  struct url *conn;  FILE *fp;  int sock = -1;  int flags;  /* Set to 1 when the authorization has failed permanently and should     not be tried again. */  int auth_finished = 0;  /* Whether NTLM authentication is used for this request. */  int ntlm_seen = 0;  /* Whether our connection to the remote host is through SSL.  */  int using_ssl = 0;  /* Whether a HEAD request will be issued (as opposed to GET or     POST). */  int head_only = *dt & HEAD_ONLY;  char *head;  struct response *resp;  char hdrval[256];  char *message;  /* Whether this connection will be kept alive after the HTTP request     is done. */  int keep_alive;  /* Whether keep-alive should be inhibited.     RFC 2068 requests that 1.0 clients not send keep-alive requests     to proxies.  This is because many 1.0 proxies do not interpret     the Connection header and transfer it to the remote server,     causing it to not close the connection and leave both the proxy     and the client hanging.  */  int inhibit_keep_alive =    !opt.http_keep_alive || opt.ignore_length || proxy != NULL;  /* Headers sent when using POST. */  wgint post_data_size = 0;  int host_lookup_failed = 0;#ifdef HAVE_SSL  if (u->scheme == SCHEME_HTTPS)    {      /* Initialize the SSL context.  After this has once been done,	 it becomes a no-op.  */      if (!ssl_init ())	{	  scheme_disable (SCHEME_HTTPS);	  logprintf (LOG_NOTQUIET,		     _("Disabling SSL due to encountered errors.\n"));	  return SSLINITFAILED;	}    }#endif /* HAVE_SSL */  if (!head_only)    /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to       know the local filename so we can save to it. */    assert (*hs->local_file != NULL);  /* Initialize certain elements of struct http_stat.  */  hs->len = 0;  hs->contlen = -1;  hs->res = -1;  hs->newloc = NULL;  hs->remote_time = NULL;  hs->error = NULL;  conn = u;  /* Prepare the request to send. */  req = request_new ();  {    char *meth_arg;    const char *meth = "GET";    if (head_only)      meth = "HEAD";    else if (opt.post_file_name || opt.post_data)      meth = "POST";    /* Use the full path, i.e. one that includes the leading slash and       the query string.  E.g. if u->path is "foo/bar" and u->query is       "param=value", full_path will be "/foo/bar?param=value".  */    if (proxy#ifdef HAVE_SSL	/* When using SSL over proxy, CONNECT establishes a direct	   connection to the HTTPS server.  Therefore use the same	   argument as when talking to the server directly. */	&& u->scheme != SCHEME_HTTPS#endif	)      meth_arg = xstrdup (u->url);    else      meth_arg = url_full_path (u);    request_set_method (req, meth, meth_arg);  }  request_set_header (req, "Referer", (char *) hs->referer, rel_none);  if (*dt & SEND_NOCACHE)    request_set_header (req, "Pragma", "no-cache", rel_none);  if (hs->restval)    request_set_header (req, "Range",			aprintf ("bytes=%s-",				 number_to_static_string (hs->restval)),			rel_value);  SET_USER_AGENT (req);  request_set_header (req, "Accept", "*/*", rel_none);  /* Find the username and password for authentication. */  user = u->user;  passwd = u->passwd;  search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);  user = user ? user : (opt.http_user ? opt.http_user : opt.user);  passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);  if (user && passwd)    {      /* We have the username and the password, but haven't tried	 any authorization yet.  Let's see if the "Basic" method	 works.  If not, we'll come back here and construct a	 proper authorization method with the right challenges.	 If we didn't employ this kind of logic, every URL that	 requires authorization would have to be processed twice,	 which is very suboptimal and generates a bunch of false	 "unauthorized" errors in the server log.	 #### But this logic also has a serious problem when used	 with stronger authentications: we *first* transmit the	 username and the password in clear text, and *then* attempt a	 stronger authentication scheme.  That cannot be right!  We	 are only fortunate that almost everyone still uses the	 `Basic' scheme anyway.	 There should be an option to prevent this from happening, for	 those who use strong authentication schemes and value their	 passwords.  */      request_set_header (req, "Authorization",			  basic_authentication_encode (user, passwd),			  rel_value);    }  proxyauth = NULL;  if (proxy)    {      char *proxy_user, *proxy_passwd;      /* For normal username and password, URL components override	 command-line/wgetrc parameters.  With proxy	 authentication, it's the reverse, because proxy URLs are	 normally the "permanent" ones, so command-line args	 should take precedence.  */      if (opt.proxy_user && opt.proxy_passwd)	{	  proxy_user = opt.proxy_user;	  proxy_passwd = opt.proxy_passwd;	}      else	{	  proxy_user = proxy->user;	  proxy_passwd = proxy->passwd;	}      /* #### This does not appear right.  Can't the proxy request,	 say, `Digest' authentication?  */      if (proxy_user && proxy_passwd)	proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);      /* If we're using a proxy, we will be connecting to the proxy	 server.  */      conn = proxy;      /* Proxy authorization over SSL is handled below. */#ifdef HAVE_SSL      if (u->scheme != SCHEME_HTTPS)#endif	request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);    }  {    /* Whether we need to print the host header with braces around       host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the       usual "Host: symbolic-name:1234". */    int squares = strchr (u->host, ':') != NULL;    if (u->port == scheme_default_port (u->scheme))      request_set_header (req, "Host",			  aprintf (squares ? "[%s]" : "%s", u->host),			  rel_value);    else      request_set_header (req, "Host",			  aprintf (squares ? "[%s]:%d" : "%s:%d",				   u->host, u->port),			  rel_value);  }  if (!inhibit_keep_alive)    request_set_header (req, "Connection", "Keep-Alive", rel_none);  if (opt.cookies)    request_set_header (req, "Cookie",			cookie_header (wget_cookie_jar,
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -