📄 http.c

📁 wget (command line browser) source code
💻 C
📖 第 1 页 / 共 5 页
字号:
		     hs->newloc ? _(" [following]") : "");	  CLOSE_INVALIDATE (sock);	/* would be CLOSE_FINISH, but there					   might be more bytes in the body. */	  FREE_MAYBE (type);	  FREE_MAYBE (all_headers);	  return NEWLOCATION;	}    }  /* If content-type is not given, assume text/html.  This is because     of the multitude of broken CGI's that "forget" to generate the     content-type.  */  if (!type ||        0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||        0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))    *dt |= TEXTHTML;  else    *dt &= ~TEXTHTML;  if (opt.html_extension && (*dt & TEXTHTML))    /* -E / --html-extension / html_extension = on was specified, and this is a       text/html file.  If some case-insensitive variation on ".htm[l]" isn't       already the file's suffix, tack on ".html". */    {      char*  last_period_in_local_filename = strrchr(*hs->local_file, '.');      if (last_period_in_local_filename == NULL ||	  !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||	    strcasecmp(last_period_in_local_filename, ".html") == EQ))	{	  size_t  local_filename_len = strlen(*hs->local_file);	  	  *hs->local_file = xrealloc(*hs->local_file,				     local_filename_len + sizeof(".html"));	  strcpy(*hs->local_file + local_filename_len, ".html");	  *dt |= ADDED_HTML_EXTENSION;	}    }  if (contrange == -1)    {      /* We did not get a content-range header.  This means that the	 server did not honor our `Range' request.  Normally, this	 means we should reset hs->restval and continue normally.  */      /* However, if `-c' is used, we need to be a bit more careful:         1. If `-c' is specified and the file already existed when         Wget was started, it would be a bad idea for us to start         downloading it from scratch, effectively truncating it.  I         believe this cannot happen unless `-c' was specified.	 2. If `-c' is used on a file that is already fully	 downloaded, we're requesting bytes after the end of file,	 which can result in server not honoring `Range'.  If this is	 the case, `Content-Length' will be equal to the length of the	 file.  */      if (opt.always_rest)	{	  /* Check for condition #2. */	  if (hs->restval > 0	            /* restart was requested. */	      && contlen != -1              /* we got content-length. */	      && hs->restval >= contlen     /* file fully downloaded					       or has shrunk.  */	      )	    {	      logputs (LOG_VERBOSE, _("\\n    The file is already fully retrieved; nothing to do.\n\n"));	      /* In case the caller inspects. */	      hs->len = contlen;	      hs->res = 0;	      /* Mark as successfully retrieved. */	      *dt |= RETROKF;	      FREE_MAYBE (type);	      FREE_MAYBE (all_headers);	      CLOSE_INVALIDATE (sock);	/* would be CLOSE_FINISH, but there					   might be more bytes in the body. */	      return RETRUNNEEDED;	    }	  /* Check for condition #1. */	  if (hs->no_truncate)	    {	      logprintf (LOG_NOTQUIET,			 _("\\n\Continued download failed on this file, which conflicts with `-c'.\n\Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);	      FREE_MAYBE (type);	      FREE_MAYBE (all_headers);	      CLOSE_INVALIDATE (sock);	      return CONTNOTSUPPORTED;	    }	  /* Fallthrough */	}      hs->restval = 0;    }  else if (contrange != hs->restval ||	   (H_PARTIAL (statcode) && contrange == -1))    {      /* This means the whole request was somehow misunderstood by the	 server.  Bail out.  */      FREE_MAYBE (type);      FREE_MAYBE (all_headers);      CLOSE_INVALIDATE (sock);      return RANGEERR;    }  if (hs->restval)    {      if (contlen != -1)	contlen += contrange;      else	contrange = -1;        /* If conent-length was not sent,				  content-range will be ignored.  */    }  hs->contlen = contlen;  if (opt.verbose)    {      if ((*dt & RETROKF) && !opt.server_response)	{	  /* No need to print this output if the body won't be	     downloaded at all, or if the original server response is	     printed.  */	  logputs (LOG_VERBOSE, _("Length: "));	  if (contlen != -1)	    {	      logputs (LOG_VERBOSE, legible (contlen));	      if (contrange != -1)		logprintf (LOG_VERBOSE, _(" (%s to go)"),			   legible (contlen - contrange));	    }	  else	    logputs (LOG_VERBOSE,		     opt.ignore_length ? _("ignored") : _("unspecified"));	  if (type)	    logprintf (LOG_VERBOSE, " [%s]\n", type);	  else	    logputs (LOG_VERBOSE, "\n");	}    }  FREE_MAYBE (type);  type = NULL;			/* We don't need it any more.  */  /* Return if we have no intention of further downloading.  */  if (!(*dt & RETROKF) || (*dt & HEAD_ONLY))    {      /* In case the caller cares to look...  */      hs->len = 0L;      hs->res = 0;      FREE_MAYBE (type);      FREE_MAYBE (all_headers);      CLOSE_INVALIDATE (sock);	/* would be CLOSE_FINISH, but there				   might be more bytes in the body. */      return RETRFINISHED;    }  /* Open the local file.  */  if (!opt.dfp)    {      mkalldirs (*hs->local_file);      if (opt.backups)	rotate_backups (*hs->local_file);      fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");      if (!fp)	{	  logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));	  CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there				      might be more bytes in the body. */	  FREE_MAYBE (all_headers);	  return FOPENERR;	}    }  else				/* opt.dfp */    {      extern int global_download_count;      fp = opt.dfp;      /* To ensure that repeated "from scratch" downloads work for -O	 files, we rewind the file pointer, unless restval is	 non-zero.  (This works only when -O is used on regular files,	 but it's still a valuable feature.)	 However, this loses when more than one URL is specified on	 the command line the second rewinds eradicates the contents	 of the first download.  Thus we disable the above trick for	 all the downloads except the very first one.         #### A possible solution to this would be to remember the	 file position in the output document and to seek to that	 position, instead of rewinding.         We don't truncate stdout, since that breaks	 "wget -O - [...] >> foo".      */      if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)	{	  /* This will silently fail for streams that don't correspond	     to regular files, but that's OK.  */	  rewind (fp);	  /* ftruncate is needed because opt.dfp is opened in append	     mode if opt.always_rest is set.  */	  ftruncate (fileno (fp), 0);	  clearerr (fp);	}    }  /* #### This confuses the code that checks for file size.  There     should be some overhead information.  */  if (opt.save_headers)    fwrite (all_headers, 1, all_length, fp);  /* Get the contents of the document.  */  hs->res = get_contents (sock, fp, &hs->len, hs->restval,			  (contlen != -1 ? contlen : 0),			  &rbuf, keep_alive, &hs->dltime);  if (hs->res >= 0)    CLOSE_FINISH (sock);  else    CLOSE_INVALIDATE (sock);  {    /* Close or flush the file.  We have to be careful to check for       error here.  Checking the result of fwrite() is not enough --       errors could go unnoticed!  */    int flush_res;    if (!opt.dfp)      flush_res = fclose (fp);    else      flush_res = fflush (fp);    if (flush_res == EOF)      hs->res = -2;  }  FREE_MAYBE (all_headers);  if (hs->res == -2)    return FWRITEERR;  return RETRFINISHED;}/* The genuine HTTP loop!  This is the part where the retrieval is   retried, and retried, and retried, and...  */uerr_thttp_loop (struct url *u, char **newloc, char **local_file, const char *referer,	   int *dt, struct url *proxy){  int count;  int use_ts, got_head = 0;	/* time-stamping info */  char *filename_plus_orig_suffix;  char *local_filename = NULL;  char *tms, *locf, *tmrate;  uerr_t err;  time_t tml = -1, tmr = -1;	/* local and remote time-stamps */  long local_size = 0;		/* the size of the local file */  size_t filename_len;  struct http_stat hstat;	/* HTTP status */  struct stat st;  char *dummy = NULL;  /* This used to be done in main(), but it's a better idea to do it     here so that we don't go through the hoops if we're just using     FTP or whatever. */  if (opt.cookies)    {      if (!wget_cookie_jar)	wget_cookie_jar = cookie_jar_new ();      if (opt.cookies_input && !cookies_loaded_p)	{	  cookie_jar_load (wget_cookie_jar, opt.cookies_input);	  cookies_loaded_p = 1;	}    }  *newloc = NULL;  /* Warn on (likely bogus) wildcard usage in HTTP.  Don't use     has_wildcards_p because it would also warn on `?', and we know that     shows up in CGI paths a *lot*.  */  if (strchr (u->url, '*'))    logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));  /* Determine the local filename.  */  if (local_file && *local_file)    hstat.local_file = local_file;  else if (local_file)    {      *local_file = url_file_name (u);      hstat.local_file = local_file;    }  else    {      dummy = url_file_name (u);      hstat.local_file = &dummy;    }  if (!opt.output_document)    locf = *hstat.local_file;  else    locf = opt.output_document;  hstat.referer = referer;  filename_len = strlen (*hstat.local_file);  filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));  if (opt.noclobber && file_exists_p (*hstat.local_file))    {      /* If opt.noclobber is turned on and file already exists, do not	 retrieve the file */      logprintf (LOG_VERBOSE, _("\File `%s' already there, will not retrieve.\n"), *hstat.local_file);      /* If the file is there, we suppose it's retrieved OK.  */      *dt |= RETROKF;      /* #### Bogusness alert.  */      /* If its suffix is "html" or "htm" or similar, assume text/html.  */      if (has_html_suffix_p (*hstat.local_file))	*dt |= TEXTHTML;      FREE_MAYBE (dummy);      return RETROK;    }  use_ts = 0;  if (opt.timestamping)    {      boolean  local_dot_orig_file_exists = FALSE;      if (opt.backup_converted)	/* If -K is specified, we'll act on the assumption that it was specified	   last time these files were downloaded as well, and instead of just	   comparing local file X against server file X, we'll compare local	   file X.orig (if extant, else X) against server file X.  If -K	   _wasn't_ specified last time, or the server contains files called	   *.orig, -N will be back to not operating correctly with -k. */	{	  /* Would a single s[n]printf() call be faster?  --dan	     Definitely not.  sprintf() is horribly slow.  It's a	     different question whether the difference between the two	     affects a program.  Usually I'd say "no", but at one	     point I profiled Wget, and found that a measurable and	     non-negligible amount of time was lost calling sprintf()	     in url.c.  Replacing sprintf with inline calls to	     strcpy() and long_to_string() made a difference.	     --hniksic */	  memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);	  memcpy (filename_plus_orig_suffix + filename_len,		  ".orig", sizeof (".orig"));	  /* Try to stat() the .orig file. */	  if (stat (filename_plus_orig_suffix, &st) == 0)	    {	      local_dot_orig_file_exists = TRUE;	      local_filename = filename_plus_orig_suffix;	    }	}            if (!local_dot_orig_file_exists)	/* Couldn't stat() <file>.orig, so try to stat() <file>. */	if (stat (*hstat.local_file, &st) == 0)	  local_filename = *hstat.local_file;      if (local_filename != NULL)	/* There was a local file, so we'll check later to see if the version	   the server has is the same version we already have, allowing us to	   skip a download. */	{	  use_ts = 1;	  tml = st.st_mtime;#ifdef WINDOWS	  /* Modification time granularity is 2 seconds for Windows, so	     increase local time by 1 second for later comparison. */	  tml++;#endif	  local_size = st.st_size;	  got_head = 0;	}    }  /* Reset the counter.  */  count = 0;  *dt = 0 | ACCEPTRANGES;  /* THE loop */  do    {      /* Increment the pass counter.  */      ++count;      sleep_between_retrievals (count);      /* Get the current time string.  */      tms = time_str (NULL);      /* Print fetch message, if opt.verbose.  */      if (opt.verbose)	{	  char *hurl = url_string (u, 1);	  char tmp[15];	  strcpy (tmp, "        ");	  if (count > 1)	    sprintf (tmp, _("(try:%2d)"), count);	  logprintf (LOG_VERBOSE, "--%s--  %s\n  %s => `%s'\n",		     tms, hurl, tmp, locf);#ifdef WINDOWS	  ws_changetitle (hurl, 1);#endif	  xfree (hurl);	}      /* Default document type is empty.  However, if spider mode is	 on or time-stamping is employed, HEAD_ONLY commands is	 encoded within *dt.  */      if (opt.spider || (use_ts && !got_head))	*dt |= HEAD_ONLY;      else	*dt &= ~HEAD_ONLY;      /* Assume no restarting.  */      hstat.restval = 0L;      /* Decide whether or not to restart.  */      if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)	  /* #### this calls access() and then stat(); could be optimized. */	  && file_exists_p (locf))	if (stat (locf, &st) == 0 && S_ISREG (st.st_mode))	  hstat.restval = st.st_size;      /* In `-c' is used and the file is existing and non-empty,	 refuse to truncate it if the server doesn't support continued	 downloads.  */      hstat.no_truncate = 0;      if (opt.always_rest && hstat.restval)	hstat.no_truncate = 1;      /* Decide whether to send the no-cache directive.  We send it in	 two cases:	   a) we're using a proxy, and we're past our first retrieval.	      Some proxies are notorious for caching incomplete data, so	      we require a fresh get.	   b) caching is explicitly inhibited. */      if ((proxy && count > 1)	/* a */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -