📄 http.c

📁 一个从网络上自动下载文件的自由工具
💻 C
📖 第 1 页 / 共 5 页
字号:
      if (b < e && e[-1] == '\n')        --e;      if (b < e && e[-1] == '\r')        --e;      print_response_line(prefix, b, e);    }}/* Parse the `Content-Range' header and extract the information it   contains.  Returns true if successful, false otherwise.  */static boolparse_content_range (const char *hdr, wgint *first_byte_ptr,                     wgint *last_byte_ptr, wgint *entity_length_ptr){  wgint num;  /* Ancient versions of Netscape proxy server, presumably predating     rfc2068, sent out `Content-Range' without the "bytes"     specifier.  */  if (0 == strncasecmp (hdr, "bytes", 5))    {      hdr += 5;      /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the         HTTP spec. */      if (*hdr == ':')        ++hdr;      while (ISSPACE (*hdr))        ++hdr;      if (!*hdr)        return false;    }  if (!ISDIGIT (*hdr))    return false;  for (num = 0; ISDIGIT (*hdr); hdr++)    num = 10 * num + (*hdr - '0');  if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))    return false;  *first_byte_ptr = num;  ++hdr;  for (num = 0; ISDIGIT (*hdr); hdr++)    num = 10 * num + (*hdr - '0');  if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))    return false;  *last_byte_ptr = num;  ++hdr;  for (num = 0; ISDIGIT (*hdr); hdr++)    num = 10 * num + (*hdr - '0');  *entity_length_ptr = num;  return true;}/* Read the body of the request, but don't store it anywhere and don't   display a progress gauge.  This is useful for reading the bodies of   administrative responses to which we will soon issue another   request.  The response is not useful to the user, but reading it   allows us to continue using the same connection to the server.   If reading fails, false is returned, true otherwise.  In debug   mode, the body is displayed for debugging purposes.  */static boolskip_short_body (int fd, wgint contlen){  enum {    SKIP_SIZE = 512,                /* size of the download buffer */    SKIP_THRESHOLD = 4096        /* the largest size we read */  };  char dlbuf[SKIP_SIZE + 1];  dlbuf[SKIP_SIZE] = '\0';        /* so DEBUGP can safely print it */  /* We shouldn't get here with unknown contlen.  (This will change     with HTTP/1.1, which supports "chunked" transfer.)  */  assert (contlen != -1);  /* If the body is too large, it makes more sense to simply close the     connection than to try to read the body.  */  if (contlen > SKIP_THRESHOLD)    return false;  DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));  while (contlen > 0)    {      int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);      if (ret <= 0)        {          /* Don't normally report the error since this is an             optimization that should be invisible to the user.  */          DEBUGP (("] aborting (%s).\n",                   ret < 0 ? fd_errstr (fd) : "EOF received"));          return false;        }      contlen -= ret;      /* Safe even if %.*s bogusly expects terminating \0 because         we've zero-terminated dlbuf above.  */      DEBUGP (("%.*s", ret, dlbuf));    }  DEBUGP (("] done.\n"));  return true;}/* Extract a parameter from the string (typically an HTTP header) at   **SOURCE and advance SOURCE to the next parameter.  Return false   when there are no more parameters to extract.  The name of the   parameter is returned in NAME, and the value in VALUE.  If the   parameter has no value, the token's value is zeroed out.   For example, if *SOURCE points to the string "attachment;   filename=\"foo bar\"", the first call to this function will return   the token named "attachment" and no value, and the second call will   return the token named "filename" and value "foo bar".  The third   call will return false, indicating no more valid tokens.  */boolextract_param (const char **source, param_token *name, param_token *value,               char separator){  const char *p = *source;  while (ISSPACE (*p)) ++p;  if (!*p)    {      *source = p;      return false;             /* no error; nothing more to extract */    }  /* Extract name. */  name->b = p;  while (*p && !ISSPACE (*p) && *p != '=' && *p != separator) ++p;  name->e = p;  if (name->b == name->e)    return false;               /* empty name: error */  while (ISSPACE (*p)) ++p;  if (*p == separator || !*p)           /* no value */    {      xzero (*value);      if (*p == separator) ++p;      *source = p;      return true;    }  if (*p != '=')    return false;               /* error */  /* *p is '=', extract value */  ++p;  while (ISSPACE (*p)) ++p;  if (*p == '"')                /* quoted */    {      value->b = ++p;      while (*p && *p != '"') ++p;      if (!*p)        return false;      value->e = p++;      /* Currently at closing quote; find the end of param. */      while (ISSPACE (*p)) ++p;      while (*p && *p != separator) ++p;      if (*p == separator)        ++p;      else if (*p)        /* garbage after closed quote, e.g. foo="bar"baz */        return false;    }  else                          /* unquoted */    {      value->b = p;      while (*p && *p != separator) ++p;      value->e = p;      while (value->e != value->b && ISSPACE (value->e[-1]))        --value->e;      if (*p == separator) ++p;    }  *source = p;  return true;}#undef MAX#define MAX(p, q) ((p) > (q) ? (p) : (q))/* Parse the contents of the `Content-Disposition' header, extracting   the information useful to Wget.  Content-Disposition is a header   borrowed from MIME; when used in HTTP, it typically serves for   specifying the desired file name of the resource.  For example:       Content-Disposition: attachment; filename="flora.jpg"   Wget will skip the tokens it doesn't care about, such as   "attachment" in the previous example; it will also skip other   unrecognized params.  If the header is syntactically correct and   contains a file name, a copy of the file name is stored in   *filename and true is returned.  Otherwise, the function returns   false.   The file name is stripped of directory components and must not be   empty.  */static boolparse_content_disposition (const char *hdr, char **filename){  param_token name, value;  while (extract_param (&hdr, &name, &value, ';'))    if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)      {        /* Make the file name begin at the last slash or backslash. */        const char *last_slash = memrchr (value.b, '/', value.e - value.b);        const char *last_bs = memrchr (value.b, '\\', value.e - value.b);        if (last_slash && last_bs)          value.b = 1 + MAX (last_slash, last_bs);        else if (last_slash || last_bs)          value.b = 1 + (last_slash ? last_slash : last_bs);        if (value.b == value.e)          continue;        /* Start with the directory prefix, if specified. */        if (opt.dir_prefix)          {            int prefix_length = strlen (opt.dir_prefix);            bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');            int total_length;            if (add_slash)               ++prefix_length;            total_length = prefix_length + (value.e - value.b);                        *filename = xmalloc (total_length + 1);            strcpy (*filename, opt.dir_prefix);            if (add_slash)               (*filename)[prefix_length - 1] = '/';            memcpy (*filename + prefix_length, value.b, (value.e - value.b));            (*filename)[total_length] = '\0';          }        else          *filename = strdupdelim (value.b, value.e);        return true;      }  return false;}/* Persistent connections.  Currently, we cache the most recently used   connection as persistent, provided that the HTTP server agrees to   make it such.  The persistence data is stored in the variables   below.  Ideally, it should be possible to cache an arbitrary fixed   number of these connections.  *//* Whether a persistent connection is active. */static bool pconn_active;static struct {  /* The socket of the connection.  */  int socket;  /* Host and port of the currently active persistent connection. */  char *host;  int port;  /* Whether a ssl handshake has occoured on this connection.  */  bool ssl;  /* Whether the connection was authorized.  This is only done by     NTLM, which authorizes *connections* rather than individual     requests.  (That practice is peculiar for HTTP, but it is a     useful optimization.)  */  bool authorized;#ifdef ENABLE_NTLM  /* NTLM data of the current connection.  */  struct ntlmdata ntlm;#endif} pconn;/* Mark the persistent connection as invalid and free the resources it   uses.  This is used by the CLOSE_* macros after they forcefully   close a registered persistent connection.  */static voidinvalidate_persistent (void){  DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));  pconn_active = false;  fd_close (pconn.socket);  xfree (pconn.host);  xzero (pconn);}/* Register FD, which should be a TCP/IP connection to HOST:PORT, as   persistent.  This will enable someone to use the same connection   later.  In the context of HTTP, this must be called only AFTER the   response has been received and the server has promised that the   connection will remain alive.   If a previous connection was persistent, it is closed. */static voidregister_persistent (const char *host, int port, int fd, bool ssl){  if (pconn_active)    {      if (pconn.socket == fd)        {          /* The connection FD is already registered. */          return;        }      else        {          /* The old persistent connection is still active; close it             first.  This situation arises whenever a persistent             connection exists, but we then connect to a different             host, and try to register a persistent connection to that             one.  */          invalidate_persistent ();        }    }  pconn_active = true;  pconn.socket = fd;  pconn.host = xstrdup (host);  pconn.port = port;  pconn.ssl = ssl;  pconn.authorized = false;  DEBUGP (("Registered socket %d for persistent reuse.\n", fd));}/* Return true if a persistent connection is available for connecting   to HOST:PORT.  */static boolpersistent_available_p (const char *host, int port, bool ssl,                        bool *host_lookup_failed){  /* First, check whether a persistent connection is active at all.  */  if (!pconn_active)    return false;  /* If we want SSL and the last connection wasn't or vice versa,     don't use it.  Checking for host and port is not enough because     HTTP and HTTPS can apparently coexist on the same port.  */  if (ssl != pconn.ssl)    return false;  /* If we're not connecting to the same port, we're not interested. */  if (port != pconn.port)    return false;  /* If the host is the same, we're in business.  If not, there is     still hope -- read below.  */  if (0 != strcasecmp (host, pconn.host))    {      /* Check if pconn.socket is talking to HOST under another name.         This happens often when both sites are virtual hosts         distinguished only by name and served by the same network         interface, and hence the same web server (possibly set up by         the ISP and serving many different web sites).  This         admittedly unconventional optimization does not contradict         HTTP and works well with popular server software.  */      bool found;      ip_address ip;      struct address_list *al;      if (ssl)        /* Don't try to talk to two different SSL sites over the same           secure connection!  (Besides, it's not clear that           name-based virtual hosting is even possible with SSL.)  */        return false;      /* If pconn.socket's peer is one of the IP addresses HOST         resolves to, pconn.socket is for all intents and purposes         already talking to HOST.  */      if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))        {          /* Can't get the peer's address -- something must be very             wrong with the connection.  */          invalidate_persistent ();          return false;        }      al = lookup_host (host, 0);      if (!al)        {          *host_lookup_failed = true;          return false;        }      found = address_list_contains (al, &ip);      address_list_release (al);      if (!found)        return false;      /* The persistent connection's peer address was found among the         addresses HOST resolved to; therefore, pconn.sock is in fact         already talking to HOST -- no need to reconnect.  */    }  /* Finally, check whether the connection is still open.  This is     important because most servers implement liberal (short) timeout     on persistent connections.  Wget can of course always reconnect     if the connection doesn't work out, but it's nicer to know in     advance.  This test is a logical followup of the first test, but     is "expensive" and therefore placed at the end of the list.     (Current implementation of test_socket_open has a nice side     effect that it treats sockets with pending data as "closed".     This is exactly what we want: if a broken server sends message     body in response to HEAD, or if it sends more than conent-length     data, we won't reuse the corrupted connection.)  */  if (!test_socket_open (pconn.socket))    {      /* Oops, the socket is no longer open.  Now that we know that,         let's invalidate the persistent connection before returning         0.  */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -