📄 retr.c
字号:
if (!refurl) refurl = opt.referer; redirected: result = NOCONERROR; mynewloc = NULL; local_file = NULL; proxy_url = NULL; proxy = getproxy (u); if (proxy) { /* Parse the proxy URL. */ proxy_url = url_parse (proxy, &up_error_code); if (!proxy_url) { logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), proxy, url_error (up_error_code)); xfree (url); RESTORE_POST_DATA; return PROXERR; } if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme) { logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); RESTORE_POST_DATA; return PROXERR; } } if (u->scheme == SCHEME_HTTP#ifdef HAVE_SSL || u->scheme == SCHEME_HTTPS#endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url); } else if (u->scheme == SCHEME_FTP) { /* If this is a redirection, we must not allow recursive FTP retrieval, so we save recursion to oldrec, and restore it later. */ int oldrec = opt.recursive; if (redirection_count) opt.recursive = 0; result = ftp_loop (u, dt, proxy_url); opt.recursive = oldrec; /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html', `.htm' and a few others, case-insensitive. */ if (redirection_count && local_file && u->scheme == SCHEME_FTP) { if (has_html_suffix_p (local_file)) *dt |= TEXTHTML; } } if (proxy_url) { url_free (proxy_url); proxy_url = NULL; } location_changed = (result == NEWLOCATION); if (location_changed) { char *construced_newloc; struct url *newloc_parsed; assert (mynewloc != NULL); if (local_file) xfree (local_file); /* The HTTP specs only allow absolute URLs to appear in redirects, but a ton of boneheaded webservers and CGIs out there break the rules and use relative URLs, and popular browsers are lenient about this, so wget should be too. */ construced_newloc = uri_merge (url, mynewloc); xfree (mynewloc); mynewloc = construced_newloc; /* Now, see if this new location makes sense. */ newloc_parsed = url_parse (mynewloc, &up_error_code); if (!newloc_parsed) { logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, url_error (up_error_code)); url_free (u); xfree (url); xfree (mynewloc); RESTORE_POST_DATA; return result; } /* Now mynewloc will become newloc_parsed->url, because if the Location contained relative paths like .././something, we don't want that propagating as url. */ xfree (mynewloc); mynewloc = xstrdup (newloc_parsed->url); /* Check for max. number of redirections. */ if (++redirection_count > MAX_REDIRECTIONS) { logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), MAX_REDIRECTIONS); url_free (newloc_parsed); url_free (u); xfree (url); xfree (mynewloc); RESTORE_POST_DATA; return WRONGCODE; } xfree (url); url = mynewloc; url_free (u); u = newloc_parsed; /* If we're being redirected from POST, we don't want to POST again. Many requests answer POST with a redirection to an index page; that redirection is clearly a GET. We "suspend" POST data for the duration of the redirections, and restore it when we're done. */ if (!post_data_suspended) SUSPEND_POST_DATA; goto redirected; } if (local_file) { if (*dt & RETROKF) { register_download (u->url, local_file); if (redirection_count && 0 != strcmp (origurl, u->url)) register_redirection (origurl, u->url); if (*dt & TEXTHTML) register_html (u->url, local_file); } } if (file) *file = local_file ? local_file : NULL; else FREE_MAYBE (local_file); url_free (u); if (redirection_count) { if (newloc) *newloc = url; else xfree (url); } else { if (newloc) *newloc = NULL; xfree (url); } ++global_download_count; RESTORE_POST_DATA; return result;}/* Find the URLs in the file and call retrieve_url() for each of them. If HTML is non-zero, treat the file as HTML, and construct the URLs accordingly. If opt.recursive is set, call recursive_retrieve() for each file. */uerr_tretrieve_from_file (const char *file, int html, int *count){ uerr_t status; struct urlpos *url_list, *cur_url; url_list = (html ? get_urls_html (file, NULL, NULL) : get_urls_file (file)); status = RETROK; /* Suppose everything is OK. */ *count = 0; /* Reset the URL count. */ for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count) { char *filename = NULL, *new_file = NULL; int dt; if (cur_url->ignore_when_downloading) continue; if (opt.quota && total_downloaded_bytes > opt.quota) { status = QUOTEXC; break; } if (opt.recursive && cur_url->url->scheme != SCHEME_FTP) status = retrieve_tree (cur_url->url->url); else status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt); if (filename && opt.delete_after && file_exists_p (filename)) { DEBUGP (("Removing file due to --delete-after in" " retrieve_from_file():\n")); logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename); if (unlink (filename)) logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); dt &= ~RETROKF; } FREE_MAYBE (new_file); FREE_MAYBE (filename); } /* Free the linked list of URL-s. */ free_urlpos (url_list); return status;}/* Print `giving up', or `retrying', depending on the impending action. N1 and N2 are the attempt number and the attempt limit. */voidprintwhat (int n1, int n2){ logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));}/* If opt.wait or opt.waitretry are specified, and if certain conditions are met, sleep the appropriate number of seconds. See the documentation of --wait and --waitretry for more information. COUNT is the count of current retrieval, beginning with 1. */voidsleep_between_retrievals (int count){ static int first_retrieval = 1; if (first_retrieval) { /* Don't sleep before the very first retrieval. */ first_retrieval = 0; return; } if (opt.waitretry && count > 1) { /* If opt.waitretry is specified and this is a retry, wait for COUNT-1 number of seconds, or for opt.waitretry seconds. */ if (count <= opt.waitretry) sleep (count - 1); else usleep (1000000L * opt.waitretry); } else if (opt.wait) { if (!opt.random_wait || count > 1) /* If random-wait is not specified, or if we are sleeping between retries of the same download, sleep the fixed interval. */ usleep (1000000L * opt.wait); else { /* Sleep a random amount of time averaging in opt.wait seconds. The sleeping amount ranges from 0 to opt.wait*2, inclusive. */ double waitsecs = 2 * opt.wait * random_float (); DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n", opt.wait, waitsecs)); usleep (1000000L * waitsecs); } }}/* Free the linked list of urlpos. */voidfree_urlpos (struct urlpos *l){ while (l) { struct urlpos *next = l->next; if (l->url) url_free (l->url); FREE_MAYBE (l->local_name); xfree (l); l = next; }}/* Rotate FNAME opt.backups times */voidrotate_backups(const char *fname){ int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1; char *from = (char *)alloca (maxlen); char *to = (char *)alloca (maxlen); struct stat sb; int i; if (stat (fname, &sb) == 0) if (S_ISREG (sb.st_mode) == 0) return; for (i = opt.backups; i > 1; i--) { sprintf (from, "%s.%d", fname, i - 1); sprintf (to, "%s.%d", fname, i); rename (from, to); } sprintf (to, "%s.%d", fname, 1); rename(fname, to);}static int no_proxy_match PARAMS ((const char *, const char **));/* Return the URL of the proxy appropriate for url U. */static char *getproxy (struct url *u){ char *proxy = NULL; char *rewritten_url; static char rewritten_storage[1024]; if (!opt.use_proxy) return NULL; if (!no_proxy_match (u->host, (const char **)opt.no_proxy)) return NULL; switch (u->scheme) { case SCHEME_HTTP: proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy"); break;#ifdef HAVE_SSL case SCHEME_HTTPS: proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy"); break;#endif case SCHEME_FTP: proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy"); break; case SCHEME_INVALID: break; } if (!proxy || !*proxy) return NULL; /* Handle shorthands. `rewritten_storage' is a kludge to allow getproxy() to return static storage. */ rewritten_url = rewrite_shorthand_url (proxy); if (rewritten_url) { strncpy (rewritten_storage, rewritten_url, sizeof(rewritten_storage)); rewritten_storage[sizeof (rewritten_storage) - 1] = '\0'; proxy = rewritten_storage; } return proxy;}/* Should a host be accessed through proxy, concerning no_proxy? */intno_proxy_match (const char *host, const char **no_proxy){ if (!no_proxy) return 1; else return !sufmatch (no_proxy, host);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -