http.c
来自「网络爬虫程序」· C语言 代码 · 共 1,770 行 · 第 1/3 页
C
1,770 行
/***************************************************************************//* This code is part of WWW grabber called pavuk *//* Copyright (c) 1997 - 2001 Stefan Ondrejicka *//* Distributed under GPL 2 or later *//***************************************************************************/#include "config.h"#include <assert.h>#include <stdio.h>#include <stdlib.h>#include <errno.h>#include <netdb.h>#include <errno.h>#include <sys/types.h>#include <unistd.h>#include <string.h>#include <sys/stat.h>#include <time.h>#include "url.h"#include "form.h"#include "http.h"#include "net.h"#include "base64.h"#include "tools.h"#include "cookie.h"#include "mime.h"#include "errcode.h"#include "abstract.h"#include "myssl.h"#include "authinfo.h"#include "gui_api.h"#include "times.h"#include "uexit.h"#include "cookie.h"#include "ntlm_auth.h"#include "digest_auth.h"#include "doc.h"static void http_process_response_11flags(doc *, http_response *);const http_auth_type_info_t http_auths[] = { {"", HTTP_AUTH_NONE}, {"user", HTTP_AUTH_USER}, {"Basic", HTTP_AUTH_BASIC}, {"Digest", HTTP_AUTH_DIGEST}, {"NTLM", HTTP_AUTH_NTLM}, {NULL, HTTP_AUTH_NONE},};httphdr *httphdr_parse(char *str){ char *p; httphdr *rv = _malloc(sizeof(httphdr)); rv->all = FALSE; if(*str == '+') { rv->all = TRUE; str++; } p = strchr(str, ':'); if(!p) { free(rv); return NULL; } rv->name = tl_strndup(str, p - str + 1); p++; rv->val = (*p == ' ') ? tl_strdup(p + 1) : tl_strdup(p); return rv;}void httphdr_free(httphdr * hdr){ _free(hdr->val); _free(hdr->name); _free(hdr);}static char *http_get_additional_headers(url * urlp){ char pom[2048]; char *req = NULL; if(priv_cfg.http_headers) { dllist *ptr; ptr = priv_cfg.http_headers; while(ptr) { httphdr *hdr = (httphdr *) ptr->data; if(!urlp->parent_url || (urlp->parent_url && hdr->all)) { snprintf(pom, sizeof(pom), "%s %s\r\n", hdr->name, hdr->val); req = tl_str_append(req, pom); } ptr = ptr->next; } } return req;}static char *http_get_auth_str(doc * docp, char *method){ char auth[2048]; char pom[1024]; char *user, *pass, *p; int auth_scheme; http_digest_info *auth_digest; auth[0] = '\0'; if(docp->doc_url->type == URLT_FTP) return NULL; auth_digest = (http_digest_info *) docp->auth_digest; user = url_get_user(docp->doc_url, auth_digest ? auth_digest->realm : NULL); pass = url_get_pass(docp->doc_url, auth_digest ? auth_digest->realm : NULL); auth_scheme = url_get_auth_scheme(docp->doc_url, auth_digest ? auth_digest->realm : NULL); if(user) { if(pass) { if(auth_scheme == HTTP_AUTH_DIGEST && auth_digest) { http_get_digest_auth_str(auth_digest, method, user, pass, docp->doc_url, auth, sizeof(auth)); } else if(auth_scheme == HTTP_AUTH_USER) { snprintf(auth, sizeof(auth), "user %s:%s", user, pass); } else if(auth_scheme == HTTP_AUTH_BASIC) { snprintf(pom, sizeof(pom), "%s:%s", user, pass); p = base64_encode(pom); snprintf(auth, sizeof(auth), "Basic %s", p); free(p); } } else { strncpy(auth, user, sizeof(auth)); auth[sizeof(auth) - 1] = '\0'; } } return auth[0] ? tl_strdup(auth) : NULL;}static char *http_get_proxy_auth_str(doc * docp, char *method){ char auth[2048]; char pom[1024]; char *proxy_user, *proxy_pass, *p; int proxy_auth_scheme; http_digest_info *auth_proxy_digest; auth_proxy_digest = (http_digest_info *) docp->auth_proxy_digest; auth[0] = '\0'; proxy_user = priv_cfg.http_proxy_user; proxy_pass = priv_cfg.http_proxy_pass; proxy_auth_scheme = cfg.proxy_auth_scheme; if(docp->http_proxy_port) { authinfo *ai; ai = authinfo_match_entry(docp->doc_url->type, docp->http_proxy, docp->http_proxy_port, NULL, NULL); if(ai) { proxy_user = ai->user; proxy_pass = ai->pass; proxy_auth_scheme = ai->type; } } if(proxy_user) { if(proxy_pass) { if(auth_proxy_digest && proxy_auth_scheme == HTTP_AUTH_DIGEST) { http_get_digest_auth_str(auth_proxy_digest, method, proxy_user, proxy_pass, docp->doc_url, auth, sizeof(auth)); } else if(proxy_auth_scheme == HTTP_AUTH_USER) { snprintf(auth, sizeof(auth), "user %s:%s", proxy_user, proxy_pass); } else if(proxy_auth_scheme == HTTP_AUTH_BASIC) { snprintf(pom, sizeof(pom), "%s:%s", proxy_user, proxy_pass); p = base64_encode(pom); snprintf(auth, sizeof(auth), "Basic %s", p); free(p); } } else { snprintf(auth, sizeof(auth), "user %s", proxy_user); } } return auth[0] ? tl_strdup(auth) : NULL;}http_auth_type_t http_get_authorization_type(char *auth_field){ char *p = auth_field; int l, i; while(tl_ascii_isspace(*p)) p++; l = strcspn(p, " \t"); for(i = 0; http_auths[i].name; i++) { if(!strncasecmp(p, http_auths[i].name, l)) return http_auths[i].id; } return HTTP_AUTH_NONE;}int http_handle_site_auth_info(doc * docu){ int rv = -1; char *authtag = ""; int is_digest = -1;#ifdef ENABLE_NTLM int is_ntlm = -1;#endif int i; for(i = 0; authtag; i++) { authtag = get_mime_n_param_val_str("WWW-Authenticate:", docu->mime, i); if(authtag) { switch (http_get_authorization_type(authtag)) { case HTTP_AUTH_DIGEST: is_digest = i; break;#ifdef ENABLE_NTLM case HTTP_AUTH_NTLM: is_ntlm = i; break;#endif default: break; } free(authtag); } } if(is_digest >= 0) { authtag = get_mime_n_param_val_str("WWW-Authenticate:", docu->mime, is_digest); rv = http_digest_do_auth(docu, authtag); _free(authtag); }#ifdef ENABLE_NTLM else if(is_ntlm >= 0) { authtag = get_mime_n_param_val_str("WWW-Authenticate:", docu->mime, is_ntlm); rv = ntlm_negotiate_connection(docu, authtag); _free(authtag); }#endif return rv;}int http_handle_proxy_auth_info(doc * docu){ int rv = -1; char *authtag = ""; int is_digest = -1;#ifdef ENABLE_NTLM int is_ntlm = -1;#endif int i; for(i = 0; authtag; i++) { authtag = get_mime_n_param_val_str("Proxy-Authenticate:", docu->mime, i); if(authtag) { switch (http_get_authorization_type(authtag)) { case HTTP_AUTH_DIGEST: is_digest = i; break;#ifdef ENABLE_NTLM case HTTP_AUTH_NTLM: is_ntlm = i; break;#endif default: break; } free(authtag); } } if(is_digest >= 0) { authtag = get_mime_n_param_val_str("Proxy-Authenticate:", docu->mime, is_digest); rv = http_digest_do_proxy_auth(docu, authtag); _free(authtag); }#ifdef ENABLE_NTLM else if(is_ntlm >= 0) { authtag = get_mime_n_param_val_str("Proxy-Authenticate:", docu->mime, is_ntlm); rv = ntlm_negotiate_proxy_connection(docu, authtag); _free(authtag); }#endif return rv;}static int http_dumy_proxy_send_connect(doc * docp, char *host, int port){ char pom[1024]; char *req, *p; if(cfg.use_http11) snprintf(pom, sizeof(pom), "CONNECT %s:%d HTTP/1.1\r\nHost: %s:%d\r\n", host, port, host, port); else snprintf(pom, sizeof(pom), "CONNECT %s:%d HTTP/1.0\r\n", host, port); req = tl_strdup(pom); /**** information for HTTP proxy authorization ****/ p = http_get_proxy_auth_str(docp, "CONNECT"); if(p) { req = tl_str_concat(req, "Proxy-Authorization: ", p, "\r\n", NULL); _free(p); } if(docp->additional_headers) req = tl_str_concat(req, docp->additional_headers, NULL); /**** additional headers via -httpadd ****/ if((p = http_get_additional_headers(docp->doc_url))) { req = tl_str_append(req, p); _free(p); } req = tl_str_concat(req, "\r\n", NULL); DEBUG_PROTOC(gettext ("****************** Proxy connect request *****************\n")); DEBUG_PROTOC("%s", req); DEBUG_PROTOC ("**********************************************************\n"); if(abs_write(docp->datasock, req, strlen(req)) != strlen(req)) { xperror("Proxy connect request"); _free(req); return -1; } _free(req); return 0;}int http_dumy_proxy_connect_real(doc * docp, char *host, int port, char *proxy_host, int proxy_port){ char *p; int len; bool_t rem_proxy = FALSE; int rv = 0; docp->request_type = HTTP_REQ_CONNECT; docp->connect_host = host; docp->connect_port = port; if(!docp->http_proxy && proxy_host) { rem_proxy = TRUE; docp->http_proxy = tl_strdup(proxy_host); docp->http_proxy_port = proxy_port; } if(http_dumy_proxy_send_connect(docp, host, port)) return -1; p = NULL; if(http_read_mime_header(docp, &p, &len) > 0) { http_response *resp; DEBUG_PROTOS(gettext ("***************** Proxy connect response *****************\n")); DEBUG_PROTOS("%s", p); DEBUG_PROTOS ("**********************************************************\n"); docp->mime = p; resp = http_get_response_info(p); http_process_response_11flags(docp, resp); /*** proxy authorization required ***/ if(resp->ret_code == 407) { int nauth = docp->num_proxy_auth; rv = http_handle_proxy_auth_info(docp); if(rv >= 0) { _free(docp->mime); http_response_free(resp); return rv ? -1 : 0; } else if(nauth) { rv = 0; } } else if(resp->ret_code >= 400) rv = -1; http_response_free(resp); } else rv = -1; if(rem_proxy) docp->http_proxy = NULL; return rv;}int http_dumy_proxy_connect(doc * docp, char *host, int port, char *proxy_host, int proxy_port){ int rv; doc docs; /* quick save of values */ memcpy(&docs, docp, sizeof(doc)); rv = http_dumy_proxy_connect_real(docp, host, port, proxy_host, proxy_port); /* restore some of affected values */ docp->is_parsable = docs.is_parsable; docp->size = docs.size; docp->totsz = docs.totsz; docp->origsize = docs.origsize; docp->rest_pos = docs.rest_pos; docp->rest_end_pos = docs.rest_end_pos; docp->is_http11 = docs.is_http11; docp->chunk_size = docs.chunk_size; docp->is_chunked = docs.is_chunked; docp->read_chunksize = docs.read_chunksize; docp->read_trailer = docs.read_trailer; docp->is_persistent = docs.is_persistent; docp->current_size = docs.current_size; docp->adj_sz = docs.adj_sz; /* reset errcode */ if(!rv) docp->errcode = ERR_NOERROR; return rv;}/************************************************//* create and send whole HTTP request with *//* respective document body (POST req) *//************************************************/static int http_request(doc * docp, char *method, char *data, int datalen, char *conttype){ char *req = NULL; char pom[2048]; char *p; char **al; int len, wlen; url *urlp = docp->doc_url; gui_set_status(gettext("Sending request ...")); if((!cfg.http_proxy && urlp->type == URLT_HTTP) || urlp->type == URLT_HTTPS) { p = url_to_request_urlstr(urlp, FALSE); } else { /* authorization of nonanonymous FTP access */ /* via HTTP gateways is done different way */ /* use ftp://user:password@server/... in */ /* request instead of Authorization: ... */ if(urlp->type == URLT_FTP) { char *user; char *pass; user = url_get_user(urlp, NULL); pass = url_get_pass(urlp, NULL); if(user && !urlp->p.ftp.user) urlp->p.ftp.user = user; else user = NULL; if(pass && !urlp->p.ftp.password) urlp->p.ftp.password = pass; else pass = NULL; p = url_to_request_urlstr(urlp, TRUE); if(user) urlp->p.ftp.user = NULL; if(pass) urlp->p.ftp.password = NULL; } else p = url_to_request_urlstr(urlp, TRUE); } if(cfg.use_http11) req = tl_str_concat(req, method, " ", p, " HTTP/1.1\r\n", NULL); else req = tl_str_concat(req, method, " ", p, " HTTP/1.0\r\n", NULL); _free(p); if(priv_cfg.identity) { snprintf(pom, sizeof(pom), "User-Agent: %s\r\n", priv_cfg.identity); } else { snprintf(pom, sizeof(pom), "User-Agent: %s/%s %s\r\n", PACKAGE, VERSION, HOSTTYPE); } req = tl_str_append(req, pom); if(url_get_port(urlp) != prottable[urlp->type].default_port) snprintf(pom, sizeof(pom), "Host: %s:%d\r\n", url_get_site(urlp), url_get_port(urlp)); else snprintf(pom, sizeof(pom), "Host: %s\r\n", url_get_site(urlp)); req = tl_str_append(req, pom); if(cfg.send_from && priv_cfg.from) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?