⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 retr.c

📁 wget (command line browser) source code
💻 C
📖 第 1 页 / 共 2 页
字号:
  if (!refurl)    refurl = opt.referer; redirected:  result = NOCONERROR;  mynewloc = NULL;  local_file = NULL;  proxy_url = NULL;  proxy = getproxy (u);  if (proxy)    {      /* Parse the proxy URL.  */      proxy_url = url_parse (proxy, &up_error_code);      if (!proxy_url)	{	  logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),		     proxy, url_error (up_error_code));	  xfree (url);	  RESTORE_POST_DATA;	  return PROXERR;	}      if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)	{	  logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);	  url_free (proxy_url);	  xfree (url);	  RESTORE_POST_DATA;	  return PROXERR;	}    }  if (u->scheme == SCHEME_HTTP#ifdef HAVE_SSL      || u->scheme == SCHEME_HTTPS#endif      || (proxy_url && proxy_url->scheme == SCHEME_HTTP))    {      result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);    }  else if (u->scheme == SCHEME_FTP)    {      /* If this is a redirection, we must not allow recursive FTP	 retrieval, so we save recursion to oldrec, and restore it	 later.  */      int oldrec = opt.recursive;      if (redirection_count)	opt.recursive = 0;      result = ftp_loop (u, dt, proxy_url);      opt.recursive = oldrec;      /* There is a possibility of having HTTP being redirected to	 FTP.  In these cases we must decide whether the text is HTML	 according to the suffix.  The HTML suffixes are `.html',	 `.htm' and a few others, case-insensitive.  */      if (redirection_count && local_file && u->scheme == SCHEME_FTP)	{	  if (has_html_suffix_p (local_file))	    *dt |= TEXTHTML;	}    }  if (proxy_url)    {      url_free (proxy_url);      proxy_url = NULL;    }  location_changed = (result == NEWLOCATION);  if (location_changed)    {      char *construced_newloc;      struct url *newloc_parsed;      assert (mynewloc != NULL);      if (local_file)	xfree (local_file);      /* The HTTP specs only allow absolute URLs to appear in	 redirects, but a ton of boneheaded webservers and CGIs out	 there break the rules and use relative URLs, and popular	 browsers are lenient about this, so wget should be too. */      construced_newloc = uri_merge (url, mynewloc);      xfree (mynewloc);      mynewloc = construced_newloc;      /* Now, see if this new location makes sense. */      newloc_parsed = url_parse (mynewloc, &up_error_code);      if (!newloc_parsed)	{	  logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,		     url_error (up_error_code));	  url_free (u);	  xfree (url);	  xfree (mynewloc);	  RESTORE_POST_DATA;	  return result;	}      /* Now mynewloc will become newloc_parsed->url, because if the         Location contained relative paths like .././something, we         don't want that propagating as url.  */      xfree (mynewloc);      mynewloc = xstrdup (newloc_parsed->url);      /* Check for max. number of redirections.  */      if (++redirection_count > MAX_REDIRECTIONS)	{	  logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),		     MAX_REDIRECTIONS);	  url_free (newloc_parsed);	  url_free (u);	  xfree (url);	  xfree (mynewloc);	  RESTORE_POST_DATA;	  return WRONGCODE;	}      xfree (url);      url = mynewloc;      url_free (u);      u = newloc_parsed;      /* If we're being redirected from POST, we don't want to POST	 again.  Many requests answer POST with a redirection to an	 index page; that redirection is clearly a GET.  We "suspend"	 POST data for the duration of the redirections, and restore	 it when we're done. */      if (!post_data_suspended)	SUSPEND_POST_DATA;      goto redirected;    }  if (local_file)    {      if (*dt & RETROKF)	{	  register_download (u->url, local_file);	  if (redirection_count && 0 != strcmp (origurl, u->url))	    register_redirection (origurl, u->url);	  if (*dt & TEXTHTML)	    register_html (u->url, local_file);	}    }  if (file)    *file = local_file ? local_file : NULL;  else    FREE_MAYBE (local_file);  url_free (u);  if (redirection_count)    {      if (newloc)	*newloc = url;      else	xfree (url);    }  else    {      if (newloc)	*newloc = NULL;      xfree (url);    }  ++global_download_count;  RESTORE_POST_DATA;  return result;}/* Find the URLs in the file and call retrieve_url() for each of   them.  If HTML is non-zero, treat the file as HTML, and construct   the URLs accordingly.   If opt.recursive is set, call recursive_retrieve() for each file.  */uerr_tretrieve_from_file (const char *file, int html, int *count){  uerr_t status;  struct urlpos *url_list, *cur_url;  url_list = (html ? get_urls_html (file, NULL, NULL)	      : get_urls_file (file));  status = RETROK;             /* Suppose everything is OK.  */  *count = 0;                  /* Reset the URL count.  */  for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)    {      char *filename = NULL, *new_file = NULL;      int dt;      if (cur_url->ignore_when_downloading)	continue;      if (opt.quota && total_downloaded_bytes > opt.quota)	{	  status = QUOTEXC;	  break;	}      if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)	status = retrieve_tree (cur_url->url->url);      else	status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);      if (filename && opt.delete_after && file_exists_p (filename))	{	  DEBUGP (("Removing file due to --delete-after in"		   " retrieve_from_file():\n"));	  logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);	  if (unlink (filename))	    logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));	  dt &= ~RETROKF;	}      FREE_MAYBE (new_file);      FREE_MAYBE (filename);    }  /* Free the linked list of URL-s.  */  free_urlpos (url_list);  return status;}/* Print `giving up', or `retrying', depending on the impending   action.  N1 and N2 are the attempt number and the attempt limit.  */voidprintwhat (int n1, int n2){  logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));}/* If opt.wait or opt.waitretry are specified, and if certain   conditions are met, sleep the appropriate number of seconds.  See   the documentation of --wait and --waitretry for more information.   COUNT is the count of current retrieval, beginning with 1. */voidsleep_between_retrievals (int count){  static int first_retrieval = 1;  if (first_retrieval)    {      /* Don't sleep before the very first retrieval. */      first_retrieval = 0;      return;    }  if (opt.waitretry && count > 1)    {      /* If opt.waitretry is specified and this is a retry, wait for	 COUNT-1 number of seconds, or for opt.waitretry seconds.  */      if (count <= opt.waitretry)	sleep (count - 1);      else	usleep (1000000L * opt.waitretry);    }  else if (opt.wait)    {      if (!opt.random_wait || count > 1)	/* If random-wait is not specified, or if we are sleeping	   between retries of the same download, sleep the fixed	   interval.  */	usleep (1000000L * opt.wait);      else	{	  /* Sleep a random amount of time averaging in opt.wait	     seconds.  The sleeping amount ranges from 0 to	     opt.wait*2, inclusive.  */	  double waitsecs = 2 * opt.wait * random_float ();	  DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",		   opt.wait, waitsecs));	  usleep (1000000L * waitsecs);	}    }}/* Free the linked list of urlpos.  */voidfree_urlpos (struct urlpos *l){  while (l)    {      struct urlpos *next = l->next;      if (l->url)	url_free (l->url);      FREE_MAYBE (l->local_name);      xfree (l);      l = next;    }}/* Rotate FNAME opt.backups times */voidrotate_backups(const char *fname){  int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;  char *from = (char *)alloca (maxlen);  char *to = (char *)alloca (maxlen);  struct stat sb;  int i;  if (stat (fname, &sb) == 0)    if (S_ISREG (sb.st_mode) == 0)      return;  for (i = opt.backups; i > 1; i--)    {      sprintf (from, "%s.%d", fname, i - 1);      sprintf (to, "%s.%d", fname, i);      rename (from, to);    }  sprintf (to, "%s.%d", fname, 1);  rename(fname, to);}static int no_proxy_match PARAMS ((const char *, const char **));/* Return the URL of the proxy appropriate for url U.  */static char *getproxy (struct url *u){  char *proxy = NULL;  char *rewritten_url;  static char rewritten_storage[1024];  if (!opt.use_proxy)    return NULL;  if (!no_proxy_match (u->host, (const char **)opt.no_proxy))    return NULL;  switch (u->scheme)    {    case SCHEME_HTTP:      proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");      break;#ifdef HAVE_SSL    case SCHEME_HTTPS:      proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");      break;#endif    case SCHEME_FTP:      proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");      break;    case SCHEME_INVALID:      break;    }  if (!proxy || !*proxy)    return NULL;  /* Handle shorthands.  `rewritten_storage' is a kludge to allow     getproxy() to return static storage. */  rewritten_url = rewrite_shorthand_url (proxy);  if (rewritten_url)    {      strncpy (rewritten_storage, rewritten_url, sizeof(rewritten_storage));      rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';      proxy = rewritten_storage;    }  return proxy;}/* Should a host be accessed through proxy, concerning no_proxy?  */intno_proxy_match (const char *host, const char **no_proxy){  if (!no_proxy)    return 1;  else    return !sufmatch (no_proxy, host);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -