📄 http.c
字号:
hs->newloc ? _(" [following]") : ""); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there might be more bytes in the body. */ FREE_MAYBE (type); FREE_MAYBE (all_headers); return NEWLOCATION; } } /* If content-type is not given, assume text/html. This is because of the multitude of broken CGI's that "forget" to generate the content-type. */ if (!type || 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) || 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S))) *dt |= TEXTHTML; else *dt &= ~TEXTHTML; if (opt.html_extension && (*dt & TEXTHTML)) /* -E / --html-extension / html_extension = on was specified, and this is a text/html file. If some case-insensitive variation on ".htm[l]" isn't already the file's suffix, tack on ".html". */ { char* last_period_in_local_filename = strrchr(*hs->local_file, '.'); if (last_period_in_local_filename == NULL || !(strcasecmp(last_period_in_local_filename, ".htm") == EQ || strcasecmp(last_period_in_local_filename, ".html") == EQ)) { size_t local_filename_len = strlen(*hs->local_file); *hs->local_file = xrealloc(*hs->local_file, local_filename_len + sizeof(".html")); strcpy(*hs->local_file + local_filename_len, ".html"); *dt |= ADDED_HTML_EXTENSION; } } if (contrange == -1) { /* We did not get a content-range header. This means that the server did not honor our `Range' request. Normally, this means we should reset hs->restval and continue normally. */ /* However, if `-c' is used, we need to be a bit more careful: 1. If `-c' is specified and the file already existed when Wget was started, it would be a bad idea for us to start downloading it from scratch, effectively truncating it. I believe this cannot happen unless `-c' was specified. 2. If `-c' is used on a file that is already fully downloaded, we're requesting bytes after the end of file, which can result in server not honoring `Range'. If this is the case, `Content-Length' will be equal to the length of the file. */ if (opt.always_rest) { /* Check for condition #2. */ if (hs->restval > 0 /* restart was requested. */ && contlen != -1 /* we got content-length. */ && hs->restval >= contlen /* file fully downloaded or has shrunk. */ ) { logputs (LOG_VERBOSE, _("\\n The file is already fully retrieved; nothing to do.\n\n")); /* In case the caller inspects. */ hs->len = contlen; hs->res = 0; /* Mark as successfully retrieved. */ *dt |= RETROKF; FREE_MAYBE (type); FREE_MAYBE (all_headers); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there might be more bytes in the body. */ return RETRUNNEEDED; } /* Check for condition #1. */ if (hs->no_truncate) { logprintf (LOG_NOTQUIET, _("\\n\Continued download failed on this file, which conflicts with `-c'.\n\Refusing to truncate existing file `%s'.\n\n"), *hs->local_file); FREE_MAYBE (type); FREE_MAYBE (all_headers); CLOSE_INVALIDATE (sock); return CONTNOTSUPPORTED; } /* Fallthrough */ } hs->restval = 0; } else if (contrange != hs->restval || (H_PARTIAL (statcode) && contrange == -1)) { /* This means the whole request was somehow misunderstood by the server. Bail out. */ FREE_MAYBE (type); FREE_MAYBE (all_headers); CLOSE_INVALIDATE (sock); return RANGEERR; } if (hs->restval) { if (contlen != -1) contlen += contrange; else contrange = -1; /* If conent-length was not sent, content-range will be ignored. */ } hs->contlen = contlen; if (opt.verbose) { if ((*dt & RETROKF) && !opt.server_response) { /* No need to print this output if the body won't be downloaded at all, or if the original server response is printed. */ logputs (LOG_VERBOSE, _("Length: ")); if (contlen != -1) { logputs (LOG_VERBOSE, legible (contlen)); if (contrange != -1) logprintf (LOG_VERBOSE, _(" (%s to go)"), legible (contlen - contrange)); } else logputs (LOG_VERBOSE, opt.ignore_length ? _("ignored") : _("unspecified")); if (type) logprintf (LOG_VERBOSE, " [%s]\n", type); else logputs (LOG_VERBOSE, "\n"); } } FREE_MAYBE (type); type = NULL; /* We don't need it any more. */ /* Return if we have no intention of further downloading. */ if (!(*dt & RETROKF) || (*dt & HEAD_ONLY)) { /* In case the caller cares to look... */ hs->len = 0L; hs->res = 0; FREE_MAYBE (type); FREE_MAYBE (all_headers); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there might be more bytes in the body. */ return RETRFINISHED; } /* Open the local file. */ if (!opt.dfp) { mkalldirs (*hs->local_file); if (opt.backups) rotate_backups (*hs->local_file); fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb"); if (!fp) { logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno)); CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there might be more bytes in the body. */ FREE_MAYBE (all_headers); return FOPENERR; } } else /* opt.dfp */ { extern int global_download_count; fp = opt.dfp; /* To ensure that repeated "from scratch" downloads work for -O files, we rewind the file pointer, unless restval is non-zero. (This works only when -O is used on regular files, but it's still a valuable feature.) However, this loses when more than one URL is specified on the command line the second rewinds eradicates the contents of the first download. Thus we disable the above trick for all the downloads except the very first one. #### A possible solution to this would be to remember the file position in the output document and to seek to that position, instead of rewinding. We don't truncate stdout, since that breaks "wget -O - [...] >> foo". */ if (!hs->restval && global_download_count == 0 && opt.dfp != stdout) { /* This will silently fail for streams that don't correspond to regular files, but that's OK. */ rewind (fp); /* ftruncate is needed because opt.dfp is opened in append mode if opt.always_rest is set. */ ftruncate (fileno (fp), 0); clearerr (fp); } } /* #### This confuses the code that checks for file size. There should be some overhead information. */ if (opt.save_headers) fwrite (all_headers, 1, all_length, fp); /* Get the contents of the document. */ hs->res = get_contents (sock, fp, &hs->len, hs->restval, (contlen != -1 ? contlen : 0), &rbuf, keep_alive, &hs->dltime); if (hs->res >= 0) CLOSE_FINISH (sock); else CLOSE_INVALIDATE (sock); { /* Close or flush the file. We have to be careful to check for error here. Checking the result of fwrite() is not enough -- errors could go unnoticed! */ int flush_res; if (!opt.dfp) flush_res = fclose (fp); else flush_res = fflush (fp); if (flush_res == EOF) hs->res = -2; } FREE_MAYBE (all_headers); if (hs->res == -2) return FWRITEERR; return RETRFINISHED;}/* The genuine HTTP loop! This is the part where the retrieval is retried, and retried, and retried, and... */uerr_thttp_loop (struct url *u, char **newloc, char **local_file, const char *referer, int *dt, struct url *proxy){ int count; int use_ts, got_head = 0; /* time-stamping info */ char *filename_plus_orig_suffix; char *local_filename = NULL; char *tms, *locf, *tmrate; uerr_t err; time_t tml = -1, tmr = -1; /* local and remote time-stamps */ long local_size = 0; /* the size of the local file */ size_t filename_len; struct http_stat hstat; /* HTTP status */ struct stat st; char *dummy = NULL; /* This used to be done in main(), but it's a better idea to do it here so that we don't go through the hoops if we're just using FTP or whatever. */ if (opt.cookies) { if (!wget_cookie_jar) wget_cookie_jar = cookie_jar_new (); if (opt.cookies_input && !cookies_loaded_p) { cookie_jar_load (wget_cookie_jar, opt.cookies_input); cookies_loaded_p = 1; } } *newloc = NULL; /* Warn on (likely bogus) wildcard usage in HTTP. Don't use has_wildcards_p because it would also warn on `?', and we know that shows up in CGI paths a *lot*. */ if (strchr (u->url, '*')) logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n")); /* Determine the local filename. */ if (local_file && *local_file) hstat.local_file = local_file; else if (local_file) { *local_file = url_file_name (u); hstat.local_file = local_file; } else { dummy = url_file_name (u); hstat.local_file = &dummy; } if (!opt.output_document) locf = *hstat.local_file; else locf = opt.output_document; hstat.referer = referer; filename_len = strlen (*hstat.local_file); filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig")); if (opt.noclobber && file_exists_p (*hstat.local_file)) { /* If opt.noclobber is turned on and file already exists, do not retrieve the file */ logprintf (LOG_VERBOSE, _("\File `%s' already there, will not retrieve.\n"), *hstat.local_file); /* If the file is there, we suppose it's retrieved OK. */ *dt |= RETROKF; /* #### Bogusness alert. */ /* If its suffix is "html" or "htm" or similar, assume text/html. */ if (has_html_suffix_p (*hstat.local_file)) *dt |= TEXTHTML; FREE_MAYBE (dummy); return RETROK; } use_ts = 0; if (opt.timestamping) { boolean local_dot_orig_file_exists = FALSE; if (opt.backup_converted) /* If -K is specified, we'll act on the assumption that it was specified last time these files were downloaded as well, and instead of just comparing local file X against server file X, we'll compare local file X.orig (if extant, else X) against server file X. If -K _wasn't_ specified last time, or the server contains files called *.orig, -N will be back to not operating correctly with -k. */ { /* Would a single s[n]printf() call be faster? --dan Definitely not. sprintf() is horribly slow. It's a different question whether the difference between the two affects a program. Usually I'd say "no", but at one point I profiled Wget, and found that a measurable and non-negligible amount of time was lost calling sprintf() in url.c. Replacing sprintf with inline calls to strcpy() and long_to_string() made a difference. --hniksic */ memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len); memcpy (filename_plus_orig_suffix + filename_len, ".orig", sizeof (".orig")); /* Try to stat() the .orig file. */ if (stat (filename_plus_orig_suffix, &st) == 0) { local_dot_orig_file_exists = TRUE; local_filename = filename_plus_orig_suffix; } } if (!local_dot_orig_file_exists) /* Couldn't stat() <file>.orig, so try to stat() <file>. */ if (stat (*hstat.local_file, &st) == 0) local_filename = *hstat.local_file; if (local_filename != NULL) /* There was a local file, so we'll check later to see if the version the server has is the same version we already have, allowing us to skip a download. */ { use_ts = 1; tml = st.st_mtime;#ifdef WINDOWS /* Modification time granularity is 2 seconds for Windows, so increase local time by 1 second for later comparison. */ tml++;#endif local_size = st.st_size; got_head = 0; } } /* Reset the counter. */ count = 0; *dt = 0 | ACCEPTRANGES; /* THE loop */ do { /* Increment the pass counter. */ ++count; sleep_between_retrievals (count); /* Get the current time string. */ tms = time_str (NULL); /* Print fetch message, if opt.verbose. */ if (opt.verbose) { char *hurl = url_string (u, 1); char tmp[15]; strcpy (tmp, " "); if (count > 1) sprintf (tmp, _("(try:%2d)"), count); logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n", tms, hurl, tmp, locf);#ifdef WINDOWS ws_changetitle (hurl, 1);#endif xfree (hurl); } /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ if (opt.spider || (use_ts && !got_head)) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; /* Assume no restarting. */ hstat.restval = 0L; /* Decide whether or not to restart. */ if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest) /* #### this calls access() and then stat(); could be optimized. */ && file_exists_p (locf)) if (stat (locf, &st) == 0 && S_ISREG (st.st_mode)) hstat.restval = st.st_size; /* In `-c' is used and the file is existing and non-empty, refuse to truncate it if the server doesn't support continued downloads. */ hstat.no_truncate = 0; if (opt.always_rest && hstat.restval) hstat.no_truncate = 1; /* Decide whether to send the no-cache directive. We send it in two cases: a) we're using a proxy, and we're past our first retrieval. Some proxies are notorious for caching incomplete data, so we require a fresh get. b) caching is explicitly inhibited. */ if ((proxy && count > 1) /* a */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -