📄 http.c
字号:
/* HTTP support. Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.This file is part of GNU Wget.GNU Wget is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2 of the License, or (at your option) any later version.GNU Wget is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See theGNU General Public License for more details.You should have received a copy of the GNU General Public Licensealong with Wget; if not, write to the Free SoftwareFoundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.In addition, as a special exception, the Free Software Foundationgives permission to link the code of its release of Wget with theOpenSSL project's "OpenSSL" library (or with modified versions of itthat use the same license as the "OpenSSL" library), and distributethe linked executables. You must obey the GNU General Public Licensein all respects for all of the code used other than "OpenSSL". If youmodify this file, you may extend this exception to your version of thefile, but you are not obligated to do so. If you do not wish to doso, delete this exception statement from your version. */#include <config.h>#include <stdio.h>#include <stdlib.h>#include <sys/types.h>#ifdef HAVE_STRING_H# include <string.h>#else# include <strings.h>#endif#ifdef HAVE_UNISTD_H# include <unistd.h>#endif#include <assert.h>#include <errno.h>#if TIME_WITH_SYS_TIME# include <sys/time.h># include <time.h>#else# if HAVE_SYS_TIME_H# include <sys/time.h># else# include <time.h># endif#endif#ifndef errnoextern int errno;#endif#include "wget.h"#include "utils.h"#include "url.h"#include "host.h"#include "rbuf.h"#include "retr.h"#include "headers.h"#include "connect.h"#include "netrc.h"#ifdef HAVE_SSL# include "gen_sslfunc.h"#endif /* HAVE_SSL */#include "cookies.h"#ifdef USE_DIGEST# include "gen-md5.h"#endif#include "convert.h"extern char *version_string;extern LARGE_INT total_downloaded_bytes;static int cookies_loaded_p;struct cookie_jar *wget_cookie_jar;#define TEXTHTML_S "text/html"#define TEXTXHTML_S "application/xhtml+xml"#define HTTP_ACCEPT "*/*"/* Some status code validation macros: */#define H_20X(x) (((x) >= 200) && ((x) < 300))#define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \ || (x) == HTTP_STATUS_MOVED_TEMPORARILY \ || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)/* HTTP/1.0 status codes from RFC1945, provided for reference. *//* Successful 2xx. */#define HTTP_STATUS_OK 200#define HTTP_STATUS_CREATED 201#define HTTP_STATUS_ACCEPTED 202#define HTTP_STATUS_NO_CONTENT 204#define HTTP_STATUS_PARTIAL_CONTENTS 206/* Redirection 3xx. */#define HTTP_STATUS_MULTIPLE_CHOICES 300#define HTTP_STATUS_MOVED_PERMANENTLY 301#define HTTP_STATUS_MOVED_TEMPORARILY 302#define HTTP_STATUS_NOT_MODIFIED 304#define HTTP_STATUS_TEMPORARY_REDIRECT 307/* Client error 4xx. */#define HTTP_STATUS_BAD_REQUEST 400#define HTTP_STATUS_UNAUTHORIZED 401#define HTTP_STATUS_FORBIDDEN 403#define HTTP_STATUS_NOT_FOUND 404/* Server errors 5xx. */#define HTTP_STATUS_INTERNAL 500#define HTTP_STATUS_NOT_IMPLEMENTED 501#define HTTP_STATUS_BAD_GATEWAY 502#define HTTP_STATUS_UNAVAILABLE 503/* Parse the HTTP status line, which is of format: HTTP-Version SP Status-Code SP Reason-Phrase The function returns the status-code, or -1 if the status line is malformed. The pointer to reason-phrase is returned in RP. */static intparse_http_status_line (const char *line, const char **reason_phrase_ptr){ /* (the variables must not be named `major' and `minor', because that breaks compilation with SunOS4 cc.) */ int mjr, mnr, statcode; const char *p; *reason_phrase_ptr = NULL; /* The standard format of HTTP-Version is: `HTTP/X.Y', where X is major version, and Y is minor version. */ if (strncmp (line, "HTTP/", 5) != 0) return -1; line += 5; /* Calculate major HTTP version. */ p = line; for (mjr = 0; ISDIGIT (*line); line++) mjr = 10 * mjr + (*line - '0'); if (*line != '.' || p == line) return -1; ++line; /* Calculate minor HTTP version. */ p = line; for (mnr = 0; ISDIGIT (*line); line++) mnr = 10 * mnr + (*line - '0'); if (*line != ' ' || p == line) return -1; /* Wget will accept only 1.0 and higher HTTP-versions. The value of minor version can be safely ignored. */ if (mjr < 1) return -1; ++line; /* Calculate status code. */ if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2]))) return -1; statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0'); /* Set up the reason phrase pointer. */ line += 3; /* RFC2068 requires SPC here, but we allow the string to finish here, in case no reason-phrase is present. */ if (*line != ' ') { if (!*line) *reason_phrase_ptr = line; else return -1; } else *reason_phrase_ptr = line + 1; return statcode;}#define WMIN(x, y) ((x) > (y) ? (y) : (x))/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */static intpost_file (int sock, void *ssl, const char *file_name, long promised_size){ static char chunk[8192]; long written = 0; int write_error; FILE *fp; /* Only one of SOCK and SSL may be active at the same time. */ assert (sock > -1 || ssl != NULL); assert (sock == -1 || ssl == NULL); DEBUGP (("[writing POST file %s ... ", file_name)); fp = fopen (file_name, "rb"); if (!fp) return -1; while (!feof (fp) && written < promised_size) { int towrite; int length = fread (chunk, 1, sizeof (chunk), fp); if (length == 0) break; towrite = WMIN (promised_size - written, length);#ifdef HAVE_SSL if (ssl) write_error = ssl_iwrite (ssl, chunk, towrite); else#endif write_error = iwrite (sock, chunk, towrite); if (write_error < 0) { fclose (fp); return -1; } written += towrite; } fclose (fp); /* If we've written less than was promised, report a (probably nonsensical) error rather than break the promise. */ if (written < promised_size) { errno = EINVAL; return -1; } assert (written == promised_size); DEBUGP (("done]\n")); return 0;}/* Functions to be used as arguments to header_process(): */struct http_process_range_closure { long first_byte_pos; long last_byte_pos; long entity_length;};/* Parse the `Content-Range' header and extract the information it contains. Returns 1 if successful, -1 otherwise. */static inthttp_process_range (const char *hdr, void *arg){ struct http_process_range_closure *closure = (struct http_process_range_closure *)arg; long num; /* Certain versions of Nutscape proxy server send out `Content-Length' without "bytes" specifier, which is a breach of RFC2068 (as well as the HTTP/1.1 draft which was current at the time). But hell, I must support it... */ if (!strncasecmp (hdr, "bytes", 5)) { hdr += 5; /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the HTTP spec. */ if (*hdr == ':') ++hdr; hdr += skip_lws (hdr); if (!*hdr) return 0; } if (!ISDIGIT (*hdr)) return 0; for (num = 0; ISDIGIT (*hdr); hdr++) num = 10 * num + (*hdr - '0'); if (*hdr != '-' || !ISDIGIT (*(hdr + 1))) return 0; closure->first_byte_pos = num; ++hdr; for (num = 0; ISDIGIT (*hdr); hdr++) num = 10 * num + (*hdr - '0'); if (*hdr != '/' || !ISDIGIT (*(hdr + 1))) return 0; closure->last_byte_pos = num; ++hdr; for (num = 0; ISDIGIT (*hdr); hdr++) num = 10 * num + (*hdr - '0'); closure->entity_length = num; return 1;}/* Place 1 to ARG if the HDR contains the word "none", 0 otherwise. Used for `Accept-Ranges'. */static inthttp_process_none (const char *hdr, void *arg){ int *where = (int *)arg; if (strstr (hdr, "none")) *where = 1; else *where = 0; return 1;}/* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */static inthttp_process_type (const char *hdr, void *arg){ char **result = (char **)arg; /* Locate P on `;' or the terminating zero, whichever comes first. */ const char *p = strchr (hdr, ';'); if (!p) p = hdr + strlen (hdr); while (p > hdr && ISSPACE (*(p - 1))) --p; *result = strdupdelim (hdr, p); return 1;}/* Check whether the `Connection' header is set to "keep-alive". */static inthttp_process_connection (const char *hdr, void *arg){ int *flag = (int *)arg; if (!strcasecmp (hdr, "Keep-Alive")) *flag = 1; return 1;}/* Commit the cookie to the cookie jar. */inthttp_process_set_cookie (const char *hdr, void *arg){ struct url *u = (struct url *)arg; /* The jar should have been created by now. */ assert (wget_cookie_jar != NULL); cookie_jar_process_set_cookie (wget_cookie_jar, u->host, u->port, u->path, hdr); return 1;}/* Persistent connections. Currently, we cache the most recently used connection as persistent, provided that the HTTP server agrees to make it such. The persistence data is stored in the variables below. Ideally, it would be in a structure, and it should be possible to cache an arbitrary fixed number of these connections. I think the code is quite easy to extend in that direction. *//* Whether a persistent connection is active. */static int pc_active_p;/* Host and port of currently active persistent connection. */static struct address_list *pc_last_host_ip;static unsigned short pc_last_port;/* File descriptor of the currently active persistent connection. */static int pc_last_fd;#ifdef HAVE_SSL/* Whether a ssl handshake has occoured on this connection */static int pc_active_ssl;/* SSL connection of the currently active persistent connection. */static SSL *pc_last_ssl;#endif /* HAVE_SSL *//* Mark the persistent connection as invalid. This is used by the CLOSE_* macros after they forcefully close a registered persistent connection. This does not close the file descriptor -- it is left to the caller to do that. (Maybe it should, though.) */static voidinvalidate_persistent (void){ pc_active_p = 0;#ifdef HAVE_SSL pc_active_ssl = 0;#endif /* HAVE_SSL */ if (pc_last_host_ip != NULL) { address_list_release (pc_last_host_ip); pc_last_host_ip = NULL; } DEBUGP (("Invalidating fd %d from further reuse.\n", pc_last_fd));}/* Register FD, which should be a TCP/IP connection to HOST:PORT, as persistent. This will enable someone to use the same connection later. In the context of HTTP, this must be called only AFTER the response has been received and the server has promised that the connection will remain alive. If a previous connection was persistent, it is closed. */#ifdef HAVE_SSLstatic voidregister_persistent (const char *host, unsigned short port, int fd, SSL *ssl){#elsestatic voidregister_persistent (const char *host, unsigned short port, int fd){#endif if (pc_active_p) { if (pc_last_fd == fd) { /* The connection FD is already registered. Nothing to do. */ return; } else { /* The old persistent connection is still active; let's close it first. This situation arises whenever a persistent connection exists, but we then connect to a different host, and try to register a persistent connection to that one. */#ifdef HAVE_SSL /* The ssl disconnect has to take place before the closing of pc_last_fd. */ if (pc_last_ssl) shutdown_ssl(pc_last_ssl);#endif CLOSE (pc_last_fd); invalidate_persistent (); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -