📄 http.c
字号:
{ int ext_num = 1; do sprintf (*hs->local_file + local_filename_len, ".%d.html", ext_num++); while (file_exists_p (*hs->local_file)); } *dt |= ADDED_HTML_EXTENSION; } } if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE) { /* If `-c' is in use and the file has been fully downloaded (or the remote file has shrunk), Wget effectively requests bytes after the end of file and the server response with 416. */ logputs (LOG_VERBOSE, _("\\n The file is already fully retrieved; nothing to do.\n\n")); /* In case the caller inspects. */ hs->len = contlen; hs->res = 0; /* Mark as successfully retrieved. */ *dt |= RETROKF; xfree_null (type); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there might be more bytes in the body. */ return RETRUNNEEDED; } if ((contrange != 0 && contrange != hs->restval) || (H_PARTIAL (statcode) && !contrange)) { /* The Range request was somehow misunderstood by the server. Bail out. */ xfree_null (type); CLOSE_INVALIDATE (sock); return RANGEERR; } hs->contlen = contlen + contrange; if (opt.verbose) { if (*dt & RETROKF) { /* No need to print this output if the body won't be downloaded at all, or if the original server response is printed. */ logputs (LOG_VERBOSE, _("Length: ")); if (contlen != -1) { logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange)); if (contlen + contrange >= 1024) logprintf (LOG_VERBOSE, " (%s)", human_readable (contlen + contrange)); if (contrange) { if (contlen >= 1024) logprintf (LOG_VERBOSE, _(", %s (%s) remaining"), with_thousand_seps (contlen), human_readable (contlen)); else logprintf (LOG_VERBOSE, _(", %s remaining"), with_thousand_seps (contlen)); } } else logputs (LOG_VERBOSE, opt.ignore_length ? _("ignored") : _("unspecified")); if (type) logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type)); else logputs (LOG_VERBOSE, "\n"); } } xfree_null (type); type = NULL; /* We don't need it any more. */ /* Return if we have no intention of further downloading. */ if (!(*dt & RETROKF) || head_only) { /* In case the caller cares to look... */ hs->len = 0; hs->res = 0; xfree_null (type); if (head_only) /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the servers not to send body in response to a HEAD request. If you encounter such a server (more likely a broken CGI), use `--no-http-keep-alive'. */ CLOSE_FINISH (sock); else if (keep_alive && skip_short_body (sock, contlen)) /* Successfully skipped the body; also keep using the socket. */ CLOSE_FINISH (sock); else CLOSE_INVALIDATE (sock); return RETRFINISHED; } /* Open the local file. */ if (!output_stream) { mkalldirs (*hs->local_file); if (opt.backups) rotate_backups (*hs->local_file); if (hs->restval) fp = fopen (*hs->local_file, "ab"); else if (ALLOW_CLOBBER) fp = fopen (*hs->local_file, "wb"); else { fp = fopen_excl (*hs->local_file, 1); if (!fp && errno == EEXIST) { /* We cannot just invent a new name and use it (which is what functions like unique_create typically do) because we told the user we'd use this name. Instead, return and retry the download. */ logprintf (LOG_NOTQUIET, _("%s has sprung into existence.\n"), *hs->local_file); CLOSE_INVALIDATE (sock); return FOPEN_EXCL_ERR; } } if (!fp) { logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno)); CLOSE_INVALIDATE (sock); return FOPENERR; } } else fp = output_stream; /* #### This confuses the timestamping code that checks for file size. Maybe we should save some additional information? */ if (opt.save_headers) fwrite (head, 1, strlen (head), fp); /* Now we no longer need to store the response header. */ xfree (head); /* Download the request body. */ flags = 0; if (contlen != -1) /* If content-length is present, read that much; otherwise, read until EOF. The HTTP spec doesn't require the server to actually close the connection when it's done sending data. */ flags |= rb_read_exactly; if (hs->restval > 0 && contrange == 0) /* If the server ignored our range request, instruct fd_read_body to skip the first RESTVAL bytes of body. */ flags |= rb_skip_startpos; hs->len = hs->restval; hs->rd_size = 0; hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, hs->restval, &hs->rd_size, &hs->len, &hs->dltime, flags); if (hs->res >= 0) CLOSE_FINISH (sock); else CLOSE_INVALIDATE (sock); { /* Close or flush the file. We have to be careful to check for error here. Checking the result of fwrite() is not enough -- errors could go unnoticed! */ int flush_res; if (!output_stream) flush_res = fclose (fp); else flush_res = fflush (fp); if (flush_res == EOF) hs->res = -2; } if (hs->res == -2) return FWRITEERR; return RETRFINISHED;}/* The genuine HTTP loop! This is the part where the retrieval is retried, and retried, and retried, and... */uerr_thttp_loop (struct url *u, char **newloc, char **local_file, const char *referer, int *dt, struct url *proxy){ int count; int use_ts, got_head = 0; /* time-stamping info */ char *filename_plus_orig_suffix; char *local_filename = NULL; char *tms, *locf, *tmrate; uerr_t err; time_t tml = -1, tmr = -1; /* local and remote time-stamps */ wgint local_size = 0; /* the size of the local file */ size_t filename_len; struct http_stat hstat; /* HTTP status */ struct_stat st; char *dummy = NULL; /* This used to be done in main(), but it's a better idea to do it here so that we don't go through the hoops if we're just using FTP or whatever. */ if (opt.cookies) { if (!wget_cookie_jar) wget_cookie_jar = cookie_jar_new (); if (opt.cookies_input && !cookies_loaded_p) { cookie_jar_load (wget_cookie_jar, opt.cookies_input); cookies_loaded_p = 1; } } *newloc = NULL; /* Warn on (likely bogus) wildcard usage in HTTP. */ if (opt.ftp_glob && has_wildcards_p (u->path)) logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n")); xzero (hstat); /* Determine the local filename. */ if (local_file && *local_file) hstat.local_file = local_file; else if (local_file && !opt.output_document) { *local_file = url_file_name (u); hstat.local_file = local_file; } else { dummy = url_file_name (u); hstat.local_file = &dummy; /* be honest about where we will save the file */ if (local_file && opt.output_document) *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); } if (!opt.output_document) locf = *hstat.local_file; else locf = opt.output_document; hstat.referer = referer; filename_len = strlen (*hstat.local_file); filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig")); if (opt.noclobber && file_exists_p (*hstat.local_file)) { /* If opt.noclobber is turned on and file already exists, do not retrieve the file */ logprintf (LOG_VERBOSE, _("\File `%s' already there; not retrieving.\n\n"), *hstat.local_file); /* If the file is there, we suppose it's retrieved OK. */ *dt |= RETROKF; /* #### Bogusness alert. */ /* If its suffix is "html" or "htm" or similar, assume text/html. */ if (has_html_suffix_p (*hstat.local_file)) *dt |= TEXTHTML; xfree_null (dummy); return RETROK; } use_ts = 0; if (opt.timestamping) { int local_dot_orig_file_exists = 0; if (opt.backup_converted) /* If -K is specified, we'll act on the assumption that it was specified last time these files were downloaded as well, and instead of just comparing local file X against server file X, we'll compare local file X.orig (if extant, else X) against server file X. If -K _wasn't_ specified last time, or the server contains files called *.orig, -N will be back to not operating correctly with -k. */ { /* Would a single s[n]printf() call be faster? --dan Definitely not. sprintf() is horribly slow. It's a different question whether the difference between the two affects a program. Usually I'd say "no", but at one point I profiled Wget, and found that a measurable and non-negligible amount of time was lost calling sprintf() in url.c. Replacing sprintf with inline calls to strcpy() and number_to_string() made a difference. --hniksic */ memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len); memcpy (filename_plus_orig_suffix + filename_len, ".orig", sizeof (".orig")); /* Try to stat() the .orig file. */ if (stat (filename_plus_orig_suffix, &st) == 0) { local_dot_orig_file_exists = 1; local_filename = filename_plus_orig_suffix; } } if (!local_dot_orig_file_exists) /* Couldn't stat() <file>.orig, so try to stat() <file>. */ if (stat (*hstat.local_file, &st) == 0) local_filename = *hstat.local_file; if (local_filename != NULL) /* There was a local file, so we'll check later to see if the version the server has is the same version we already have, allowing us to skip a download. */ { use_ts = 1; tml = st.st_mtime;#ifdef WINDOWS /* Modification time granularity is 2 seconds for Windows, so increase local time by 1 second for later comparison. */ tml++;#endif local_size = st.st_size; got_head = 0; } } /* Reset the counter. */ count = 0; *dt = 0; /* THE loop */ do { /* Increment the pass counter. */ ++count; sleep_between_retrievals (count); /* Get the current time string. */ tms = time_str (NULL); /* Print fetch message, if opt.verbose. */ if (opt.verbose) { char *hurl = url_string (u, 1); char tmp[256]; strcpy (tmp, " "); if (count > 1) sprintf (tmp, _("(try:%2d)"), count); logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n", tms, hurl, tmp, locf);#ifdef WINDOWS ws_changetitle (hurl);#endif xfree (hurl); } /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ if (opt.spider || (use_ts && !got_head)) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; /* Decide whether or not to restart. */ if (opt.always_rest && stat (locf, &st) == 0 && S_ISREG (st.st_mode)) /* When -c is used, continue from on-disk size. (Can't use hstat.len even if count>1 because we don't want a failed first attempt to clobber existing data.) */ hstat.restval = st.st_size; else if (count > 1) /* otherwise, continue where the previous try left off */ hstat.restval = hstat.len; else hstat.restval = 0; /* Decide whether to send the no-cache directive. We send it in two cases: a) we're using a proxy, and we're past our first retrieval. Some proxies are notorious for caching incomplete data, so we require a fresh get. b) caching is explicitly inhibited. */ if ((proxy && count > 1) /* a */ || !opt.allow_cache /* b */ ) *dt |= SEND_NOCACHE; else *dt &= ~SEND_NOCACHE; /* Try fetching the document, or at least its head. */ err = gethttp (u, &hstat, dt, proxy); /* It's unfortunate that wget determines the local filename before finding out the Content-Type of the file. Barring a major restructuring of the code, we need to re-set locf here, since gethttp() may have xrealloc()d *hstat.local_file to tack on ".html". */ if (!opt.output_document) locf = *hstat.local_file; /* Time? */ tms = time_str (NULL); /* Get the new location (with or without the redirection). */ if (hstat.newloc) *newloc = xstrdup (hstat.newloc); switch (err) { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: case CONERROR: case READERR: case WRITEFAILED: case RANGEERR: case FOPEN_EXCL_ERR: /* Non-fatal errors continue executing the loop, which will bring them to "while" statement at the end, to judge whether the number of tries was exceeded. */ free_hstat (&hstat); printwhat (count, opt.ntry); if (err == FOPEN_EXCL_ERR) { /* Re-determine the file name. */ if (local_file && *local_file) { xfree (*local_file); *local_file = url_file_name (u); hstat.local_file = local_file; } else { xfree (dummy); dummy = url_file_name (u); hstat.local_file = &dummy; } /* be honest about where we will save the file */ if (local_file && opt.output_document) *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document); if (!opt.output_document) locf = *hstat.local_file; else locf = opt.output_document; } continue; case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED: case SSLINITFAILED: case CONTNOTSUPPORTED: /* Fatal errors just return from the function. */ free_hstat (&hstat); xfree_null (dummy); return err; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"), *hstat.local_file, strerror (errno)); free_hstat (&hstat); xfree_null (dummy); return err; case CONSSLERR: /* Another fatal error. */ logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -