📄 http.c
字号:
/* HTTP support. Copyright (C) 2000 Kalum Somaratna This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#ifdef HAVE_CONFIG_H# include <config.h>#endif /* * HAVE_CONFIG_H */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>#include <errno.h>#include <ctype.h>#include <netdb.h>#include <netinet/in.h>#include <sys/socket.h>#include <sys/time.h>#include <assert.h>#include "connect.h"#include "url.h"#include "misc.h"#include "main.h"#include "connection.h"#include "http.h"#include "debug.h"#include "netrc.h"#include "runtime.h"#define DYNAMIC_LINE_BUFFER 40int buf_readchar(int fd, char *ret){ int res; res = krecv(fd, ret, 1, 0, rt.timeout); if (res <= 0) return res; return 1;}/* This is similar to buf_readchar, only it doesn't move the buffer position. */int buf_peek(int fd, char *ret){ int res; res = krecv(fd, ret, 1, MSG_PEEK, rt.timeout); if (res <= 0) return res; return 1;}/* Function to fetch a header from socket/file descriptor fd. The header may be of arbitrary length, since the function allocates as much memory as necessary for the header to fit. Most errors are handled. The header may be terminated by LF or CRLF. If the character after LF is SP or HT (horizontal tab), the header spans to another line (continuation header), as per RFC2068. The trailing CRLF or LF are stripped from the header, and it is zero-terminated. */uerr_t fetch_next_header(int fd, char **hdr){ int i, bufsize, res; char next; bufsize = DYNAMIC_LINE_BUFFER; *hdr = kmalloc(bufsize); for (i = 0; 1; i++) { if (i > bufsize - 1) *hdr = krealloc(*hdr, (bufsize <<= 1)); res = buf_readchar(fd, *hdr + i); if (res == 1) { if ((*hdr)[i] == '\n') { if (!(i == 0 || (i == 1 && (*hdr)[0] == '\r'))) { /* * If the header is non-empty, we need to check if it * continues on to the other line. We do that by * getting the next character without actually * downloading it (i.e. peeking it). */ res = buf_peek(fd, &next); if (res == 0) return HEOF; else if (res == -1) return HERR; /* * If the next character is SP or HT, just continue. */ if (next == '\t' || next == ' ') continue; } /* * The header ends. */ (*hdr)[i] = '\0'; /* * Get rid of '\r'. */ if (i > 0 && (*hdr)[i - 1] == '\r') (*hdr)[i - 1] = '\0'; break; } } else if (res == 0) return HEOF; else return HERR; } return HOK;}int hparsestatline(const char *hdr, const char **rp){ int mjr, mnr; /* * HTTP major and minor version. */ int statcode; /* * HTTP status code. */ const char *p; *rp = NULL; /* * The standard format of HTTP-Version is: * HTTP/x.y, where x is major version, and y is minor version. */ if (strncmp(hdr, "HTTP/", 5) != 0) return -1; hdr += 5; p = hdr; for (mjr = 0; isdigit(*hdr); hdr++) mjr = 10 * mjr + (*hdr - '0'); if (*hdr != '.' || p == hdr) return -1; ++hdr; p = hdr; for (mnr = 0; isdigit(*hdr); hdr++) mnr = 10 * mnr + (*hdr - '0'); if (*hdr != ' ' || p == hdr) return -1; /* * Wget will accept only 1.0 and higher HTTP-versions. The value * of minor version can be safely ignored. */ if (mjr < 1) return -1; /* * Skip the space. */ ++hdr; if (!(isdigit(*hdr) && isdigit(hdr[1]) && isdigit(hdr[2]))) return -1; statcode = 100 * (*hdr - '0') + 10 * (hdr[1] - '0') + (hdr[2] - '0'); /* * RFC2068 requires a SPC here, even if there is no reason-phrase. * As some servers/CGI are (incorrectly) setup to drop the SPC, * we'll be liberal and allow the status line to end here. */ if (hdr[3] != ' ') { if (!hdr[3]) *rp = hdr + 3; else return -1; } else *rp = hdr + 4; return statcode;}/* Skip LWS (linear white space), if present. Returns number of characters to skip. */int hskip_lws(const char *hdr){ int i; for (i = 0; *hdr == ' ' || *hdr == '\t' || *hdr == '\r' || *hdr == '\n'; ++hdr) ++i; return i;}/* Return the content length of the document body, if this is Content-length header, -1 otherwise. */off_t hgetlen(const char *hdr){ const int l = 15; /* * strlen("content-length:") */ off_t len; if (strncasecmp(hdr, "content-length:", l)) return -1; hdr += (l + hskip_lws(hdr + l)); if (!*hdr) return -1; if (!isdigit(*hdr)) return -1; for (len = 0; isdigit(*hdr); hdr++) len = 10 * len + (*hdr - '0'); return len;}/* Return the content-range in bytes, as returned by the server, if this is Content-range header, -1 otherwise. */off_t hgetrange(const char *hdr){ const int l = 14; /* * strlen("content-range:") */ off_t len; if (strncasecmp(hdr, "content-range:", l)) return -1; hdr += (l + hskip_lws(hdr + l)); if (!*hdr) return -1; /* * Nutscape proxy server sends content-length without "bytes" * specifier, which is a breach of HTTP/1.1 draft. But heck, I must * support it... */ if (!strncasecmp(hdr, "bytes", 5)) { hdr += 5; hdr += hskip_lws(hdr); if (!*hdr) return -1; } if (!isdigit(*hdr)) return -1; for (len = 0; isdigit(*hdr); hdr++) len = 10 * len + (*hdr - '0'); return len;}/* Returns a malloc-ed copy of the location of the document, if the string hdr begins with LOCATION_H, or NULL. */char *hgetlocation(const char *hdr){ const int l = 9; /* * strlen("location:") */ if (strncasecmp(hdr, "location:", l)) return NULL; hdr += (l + hskip_lws(hdr + l)); return kstrdup(hdr);}/* Returns a malloc-ed copy of the last-modified date of the document, if the hdr begins with LASTMODIFIED_H. */char *hgetmodified(const char *hdr){ const int l = 14; /* * strlen("last-modified:") */ if (strncasecmp(hdr, "last-modified:", l)) return NULL; hdr += (l + hskip_lws(hdr + l)); return kstrdup(hdr);}/* Returns 0 if the header is accept-ranges, and it contains the word "none", -1 if there is no accept ranges, 1 is there is accept-ranges and it is not none */int hgetaccept_ranges(const char *hdr){ const int l = 14; /* * strlen("accept-ranges:") */ if (strncasecmp(hdr, "accept-ranges:", l)) return -1; hdr += (l + hskip_lws(hdr + l)); if (strstr(hdr, "none")) return 0; else return 1;}uerr_t get_http_info(urlinfo * u, http_stat_t * hs){ int sock; char *req = NULL; char *user, *passwd, *wwwauth, *referer = NULL; uerr_t err; netrc_entry *netrc_ent; err = connect_to_server(&sock, u->host, u->port, rt.timeout); if (err != NOCONERROR) { message("Error connecting to %s", u->host); return err; }/* Authentification code*/ user = u->user; passwd = u->passwd; /* * Use .netrc if asked to do so. */ if (rt.use_netrc == TRUE) { netrc_ent = search_netrc(rt.netrc_list, u->host); if (netrc_ent != NULL) { user = netrc_ent->account; passwd = netrc_ent->password; } } user = user ? user : ""; passwd = passwd ? passwd : ""; if (strlen(user) || strlen(passwd)) { /*Construct the necessary header */ wwwauth = get_basic_auth_str(user, passwd); message("Authenticating as user %s (%s password)", user, (strlen(passwd))?"using":"no"); debug_prz("Authentification string=%s\n", wwwauth); } else wwwauth = 0;/* Handle referer */ if(u->referer) { referer = alloca(13+strlen(u->referer)); sprintf(referer, "Referer: %s\r\n", u->referer); }/* We will get http info about the file by calling http_fetch_headers with HEAD */ { const char reqfmt[] = "HEAD %s HTTP/1.0\r\n" "User-Agent: %s\r\n" "Host: %s\r\n" "Accept: */*\r\n" "%s%s\r\n"; int reqlen; reqlen = snprintf(NULL, 0, reqfmt, u->path, USER_AGENT, u->host, referer ? referer : "", wwwauth ? wwwauth : ""); if (reqlen <= 0) die("Unable to calculate buffer length for HTTP HEAD request\n"); reqlen++; /* nul */ req = kmalloc(reqlen); snprintf(req, reqlen, reqfmt, u->path, USER_AGENT, u->host, referer ? referer : "", wwwauth ? wwwauth : ""); } debug_prz("HTTP request= %s\n", req); err = http_fetch_headers(sock, u, hs, req); close(sock); kfree(req); if (wwwauth) free(wwwauth); /*Check if we authenticated using any user or password and if we were kicked out, if so return HAUTHFAIL */ if (err == HAUTHREQ && (strlen(user) || strlen(passwd))) return HAUTHFAIL; return err;}/* function to fetch the http headers from the socket *//* to a specific command string */uerr_t http_fetch_headers(int sock, urlinfo * u, http_stat_t * hs, char *command){ int num_written, hcount, statcode; uerr_t err; char *hdr, *type; char *all_headers; off_t contlen, contrange; int all_length; const char *error; hs->len = 0L; hs->contlen = -1; hs->accept_ranges = -1; hs->res = -1; hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL; /* * send the command to the server */ num_written = ksend(sock, command, strlen(command), 0, rt.timeout); if (num_written != strlen(command)) { message("Failed writing HTTP request"); return WRITEERR; } all_headers = NULL; all_length = 0; contlen = contrange = -1; statcode = -1; type = NULL; /* * Header-fetching loop. */ hcount = 0; for (;;) { ++hcount; /* * Get the header. */ err = fetch_next_header(sock, &hdr); debug_prz("Header =%s\n", hdr); if (err == HEOF) { message("End of file while parsing headers"); free(hdr); if (type) free(type); if (all_headers) free(all_headers); return HEOF; } else if (err == HERR) { message("Read error in headers"); free(hdr); if (type) free(type); if (all_headers) free(all_headers); return HERR; } /* * Exit on empty header. */ if (!*hdr) { free(hdr); break; } /* * print the header for debugging purposes */ /* message( "\n%d %s", hcount, hdr); */ /* Check for errors documented in the first header. */ if (hcount == 1) { statcode = hparsestatline(hdr, &error); hs->statcode = statcode; /* * Store the descriptive response. */ if (statcode == -1) /* * malformed request */ hs->error = kstrdup("UNKNOWN"); else if (!*error) hs->error = kstrdup("(no description)"); else hs->error = kstrdup(error); } if (contlen == -1) { contlen = hgetlen(hdr); u->file_size = hs->contlen = contlen; } /* * if the server specified a new location then lets store it */ if (!hs->newloc) hs->newloc = hgetlocation(hdr); if (!hs->remote_time) hs->remote_time = hgetmodified(hdr); if (hs->accept_ranges == -1) { hs->accept_ranges = hgetaccept_ranges(hdr); } if (!hs->newloc) hs->newloc = hgetlocation(hdr); free(hdr); } if (H_20X(statcode)) return HOK; if (H_REDIRECTED(statcode) || statcode == HTTP_MULTIPLE_CHOICES) { /* * RFC2068 says that in case of the 300 (multiple choices) * response, the server can output a preferred URL through * `Location' header; otherwise, the request should be treated * like GET. So, if the location is set, it will be a * redirection; otherwise, just proceed normally. */ if (statcode == HTTP_MULTIPLE_CHOICES && !hs->newloc) return HOK; else { if (all_headers) free(all_headers); if (type) free(type); return NEWLOCATION; } } if (statcode == HTTP_UNAUTHORIZED) { return HAUTHREQ; } return HERR;}/*Routine returns a valid Authorization request in which the username:passwd is in base64 */char *get_basic_auth_str(char *user, char *passwd){ char *p1, *p2, *ret; int len = strlen(user) + strlen(passwd) + 1; int b64len = 4 * ((len + 2) / 3); char auth_header[] = "Authorization"; p1 = kmalloc(sizeof(char) * len + 1); sprintf(p1, "%s:%s", user, passwd); p2 = kmalloc(sizeof(char) * b64len + 1); /*Encode username:passwd to base 64 */ base64_encode(p1, p2, len); ret = kmalloc((sizeof(char) * strlen(auth_header)) + b64len + 11); sprintf(ret, "%s: Basic %s\r\n", auth_header, p2); free(p1); free(p2); return ret;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -