📄 http.c
字号:
|| !opt.allow_cache /* b */ ) *dt |= SEND_NOCACHE; else *dt &= ~SEND_NOCACHE; /* Try fetching the document, or at least its head. */ err = gethttp (u, &hstat, dt, proxy); /* It's unfortunate that wget determines the local filename before finding out the Content-Type of the file. Barring a major restructuring of the code, we need to re-set locf here, since gethttp() may have xrealloc()d *hstat.local_file to tack on ".html". */ if (!opt.output_document) locf = *hstat.local_file; else locf = opt.output_document; /* Time? */ tms = time_str (NULL); /* Get the new location (with or without the redirection). */ if (hstat.newloc) *newloc = xstrdup (hstat.newloc); switch (err) { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: case CONERROR: case READERR: case WRITEFAILED: case RANGEERR: /* Non-fatal errors continue executing the loop, which will bring them to "while" statement at the end, to judge whether the number of tries was exceeded. */ free_hstat (&hstat); printwhat (count, opt.ntry); continue; break; case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED: case SSLERRCTXCREATE: case CONTNOTSUPPORTED: /* Fatal errors just return from the function. */ free_hstat (&hstat); FREE_MAYBE (dummy); return err; break; case FWRITEERR: case FOPENERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"), *hstat.local_file, strerror (errno)); free_hstat (&hstat); FREE_MAYBE (dummy); return err; break; case CONSSLERR: /* Another fatal error. */ logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n")); free_hstat (&hstat); FREE_MAYBE (dummy); return err; break; case NEWLOCATION: /* Return the new location to the caller. */ if (!hstat.newloc) { logprintf (LOG_NOTQUIET, _("ERROR: Redirection (%d) without location.\n"), hstat.statcode); free_hstat (&hstat); FREE_MAYBE (dummy); return WRONGCODE; } free_hstat (&hstat); FREE_MAYBE (dummy); return NEWLOCATION; break; case RETRUNNEEDED: /* The file was already fully retrieved. */ free_hstat (&hstat); FREE_MAYBE (dummy); return RETROK; break; case RETRFINISHED: /* Deal with you later. */ break; default: /* All possibilities should have been exhausted. */ abort (); } if (!(*dt & RETROKF)) { if (!opt.verbose) { /* #### Ugly ugly ugly! */ char *hurl = url_string (u, 1); logprintf (LOG_NONVERBOSE, "%s:\n", hurl); xfree (hurl); } logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, hstat.statcode, hstat.error); logputs (LOG_VERBOSE, "\n"); free_hstat (&hstat); FREE_MAYBE (dummy); return WRONGCODE; } /* Did we get the time-stamp? */ if (!got_head) { if (opt.timestamping && !hstat.remote_time) { logputs (LOG_NOTQUIET, _("\Last-modified header missing -- time-stamps turned off.\n")); } else if (hstat.remote_time) { /* Convert the date-string into struct tm. */ tmr = http_atotm (hstat.remote_time); if (tmr == (time_t) (-1)) logputs (LOG_VERBOSE, _("\Last-modified header invalid -- time-stamp ignored.\n")); } } /* The time-stamping section. */ if (use_ts) { got_head = 1; *dt &= ~HEAD_ONLY; use_ts = 0; /* no more time-stamping */ count = 0; /* the retrieve count for HEAD is reset */ if (hstat.remote_time && tmr != (time_t) (-1)) { /* Now time-stamping can be used validly. Time-stamping means that if the sizes of the local and remote file match, and local file is newer than the remote file, it will not be retrieved. Otherwise, the normal download procedure is resumed. */ if (tml >= tmr && (hstat.contlen == -1 || local_size == hstat.contlen)) { logprintf (LOG_VERBOSE, _("\Server file no newer than local file `%s' -- not retrieving.\n\n"), local_filename); free_hstat (&hstat); FREE_MAYBE (dummy); return RETROK; } else if (tml >= tmr) logprintf (LOG_VERBOSE, _("\The sizes do not match (local %ld) -- retrieving.\n"), local_size); else logputs (LOG_VERBOSE, _("Remote file is newer, retrieving.\n")); } free_hstat (&hstat); continue; } if ((tmr != (time_t) (-1)) && !opt.spider && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && ((hstat.contlen == -1) || (hstat.len >= hstat.contlen && !opt.kill_longer))))) { /* #### This code repeats in http.c and ftp.c. Move it to a function! */ const char *fl = NULL; if (opt.output_document) { if (opt.od_known_regular) fl = opt.output_document; } else fl = *hstat.local_file; if (fl) touch (fl, tmr); } /* End of time-stamping section. */ if (opt.spider) { logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error); FREE_MAYBE (dummy); return RETROK; } tmrate = retr_rate (hstat.len - hstat.restval, hstat.dltime, 0); if (hstat.len == hstat.contlen) { if (*dt & RETROKF) { logprintf (LOG_VERBOSE, _("%s (%s) - `%s' saved [%ld/%ld]\n\n"), tms, tmrate, locf, hstat.len, hstat.contlen); logprintf (LOG_NONVERBOSE, "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", tms, u->url, hstat.len, hstat.contlen, locf, count); } ++opt.numurls; total_downloaded_bytes += hstat.len; /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf); else downloaded_file(FILE_DOWNLOADED_NORMALLY, locf); free_hstat (&hstat); FREE_MAYBE (dummy); return RETROK; } else if (hstat.res == 0) /* No read error */ { if (hstat.contlen == -1) /* We don't know how much we were supposed to get, so assume we succeeded. */ { if (*dt & RETROKF) { logprintf (LOG_VERBOSE, _("%s (%s) - `%s' saved [%ld]\n\n"), tms, tmrate, locf, hstat.len); logprintf (LOG_NONVERBOSE, "%s URL:%s [%ld] -> \"%s\" [%d]\n", tms, u->url, hstat.len, locf, count); } ++opt.numurls; total_downloaded_bytes += hstat.len; /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf); else downloaded_file(FILE_DOWNLOADED_NORMALLY, locf); free_hstat (&hstat); FREE_MAYBE (dummy); return RETROK; } else if (hstat.len < hstat.contlen) /* meaning we lost the connection too soon */ { logprintf (LOG_VERBOSE, _("%s (%s) - Connection closed at byte %ld. "), tms, tmrate, hstat.len); printwhat (count, opt.ntry); free_hstat (&hstat); continue; } else if (!opt.kill_longer) /* meaning we got more than expected */ { logprintf (LOG_VERBOSE, _("%s (%s) - `%s' saved [%ld/%ld])\n\n"), tms, tmrate, locf, hstat.len, hstat.contlen); logprintf (LOG_NONVERBOSE, "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", tms, u->url, hstat.len, hstat.contlen, locf, count); ++opt.numurls; total_downloaded_bytes += hstat.len; /* Remember that we downloaded the file for later ".orig" code. */ if (*dt & ADDED_HTML_EXTENSION) downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf); else downloaded_file(FILE_DOWNLOADED_NORMALLY, locf); free_hstat (&hstat); FREE_MAYBE (dummy); return RETROK; } else /* the same, but not accepted */ { logprintf (LOG_VERBOSE, _("%s (%s) - Connection closed at byte %ld/%ld. "), tms, tmrate, hstat.len, hstat.contlen); printwhat (count, opt.ntry); free_hstat (&hstat); continue; } } else /* now hstat.res can only be -1 */ { if (hstat.contlen == -1) { logprintf (LOG_VERBOSE, _("%s (%s) - Read error at byte %ld (%s)."), tms, tmrate, hstat.len, strerror (errno)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; } else /* hstat.res == -1 and contlen is given */ { logprintf (LOG_VERBOSE, _("%s (%s) - Read error at byte %ld/%ld (%s). "), tms, tmrate, hstat.len, hstat.contlen, strerror (errno)); printwhat (count, opt.ntry); free_hstat (&hstat); continue; } } /* not reached */ break; } while (!opt.ntry || (count < opt.ntry)); return TRYLIMEXC;}/* Converts struct tm to time_t, assuming the data in tm is UTC rather than local timezone. mktime is similar but assumes struct tm, also known as the "broken-down" form of time, is in local time zone. mktime_from_utc uses mktime to make the conversion understanding that an offset will be introduced by the local time assumption. mktime_from_utc then measures the introduced offset by applying gmtime to the initial result and applying mktime to the resulting "broken-down" form. The difference between the two mktime results is the measured offset which is then subtracted from the initial mktime result to yield a calendar time which is the value returned. tm_isdst in struct tm is set to 0 to force mktime to introduce a consistent offset (the non DST offset) since tm and tm+o might be on opposite sides of a DST change. Some implementations of mktime return -1 for the nonexistent localtime hour at the beginning of DST. In this event, use mktime(tm - 1hr) + 3600. Schematically mktime(tm) --> t+o gmtime(t+o) --> tm+o mktime(tm+o) --> t+2o t+o - (t+2o - t+o) = t Note that glibc contains a function of the same purpose named `timegm' (reverse of gmtime). But obviously, it is not universally available, and unfortunately it is not straightforwardly extractable for use here. Perhaps configure should detect timegm and use it where available. Contributed by Roger Beeman <beeman@cisco.com>, with the help of Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. Further improved by Roger with assistance from Edward J. Sabol based on input by Jamie Zawinski. */static time_tmktime_from_utc (struct tm *t){ time_t tl, tb; struct tm *tg; tl = mktime (t); if (tl == -1) { t->tm_hour--; tl = mktime (t); if (tl == -1) return -1; /* can't deal with output from strptime */ tl += 3600; } tg = gmtime (&tl); tg->tm_isdst = 0; tb = mktime (tg); if (tb == -1) { tg->tm_hour--; tb = mktime (tg); if (tb == -1) return -1; /* can't deal with output from gmtime */ tb += 3600; } return (tl - (tb - tl));}/* Check whether the result of strptime() indicates success. strptime() returns the pointer to how far it got to in the string. The processing has been successful if the string is at `GMT' or `+X', or at the end of the string. In extended regexp parlance, the function returns 1 if P matches "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime can return) is considered a failure and 0 is returned. */static intcheck_end (const char *p){ if (!p) return 0; while (ISSPACE (*p)) ++p; if (!*p || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T') || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1]))) return 1; else return 0;}/* Convert the textual specification of time in TIME_STRING to the number of seconds since the Epoch. TIME_STRING can be in any of the three formats RFC2068 allows the HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date. Timezones are ignored, and should be GMT. Return the computed time_t representation, or -1 if the conversion fails. This function uses strptime with various string formats for parsing TIME_STRING. This results in a parser that is not as lenient in interpreting TIME_STRING as I would like it to be. Being based on strptime, it always allows shortened months, one-digit days, etc., but due to the multitude of formats in which time can be represented, an ideal HTTP time parser would be even more forgiving. It should completely ignore things like week days and concentrate only on the various forms of representing years, months, days, hours, minutes, and seconds. For example, it would be nice if it accepted ISO 8601 out of the box. I've investigated free and PD code for this purpose, but none was usable. getdate was big and unwieldy, and had potential copyright issues, or so I was informed. Dr. Marcus Hennecke's atotm(), distributed with phttpd, is excellent, but we cannot use it because it is not assigned to the FSF. So I stuck it with strptime. */time_thttp_atotm (const char *time_string){ /* NOTE: Solaris strptime man page claims that %n and %t match white space, but that's not universally available. Instead, we simply use ` ' to mean "skip all WS", which works under all strptime implementations I've tested. */ static const char *time_formats[] = {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -