http.c
来自「网络爬虫程序」· C语言 代码 · 共 1,770 行 · 第 1/3 页
C
1,770 行
snprintf(pom, sizeof(pom), "From: %s\r\n", priv_cfg.from); req = tl_str_append(req, pom); } if(cfg.send_cookies && (p = cookie_get_field(urlp))) { req = tl_str_append(req, p); _free(p); } /*** HTTP authorization field ****/ p = http_get_auth_str(docp, method); if(p) { req = tl_str_concat(req, "Authorization: ", p, "\r\n", NULL); _free(p); } /**** information for HTTP proxy authorization ****/ p = http_get_proxy_auth_str(docp, method); if(p) { req = tl_str_concat(req, "Proxy-Authorization: ", p, "\r\n", NULL); _free(p); } /**** prefered language ****/ if(priv_cfg.accept_lang) { bool_t f = FALSE; al = priv_cfg.accept_lang; if(*al) { snprintf(pom, sizeof(pom), "Accept-Language: %s", *al); al++; f = TRUE; } while(*al) { strcat(pom, ","); /* FIXME: Security */ strcat(pom, *al); al++; } if(f) { strcat(pom, "\r\n"); req = tl_str_append(req, pom); } } /*** preffered character sets ***/ if(priv_cfg.accept_chars) { bool_t f = FALSE; al = priv_cfg.accept_chars; if(*al) { snprintf(pom, sizeof(pom), "Accept-Charset: %s", *al); al++; f = TRUE; } while(*al) { strcat(pom, ","); strcat(pom, *al); al++; } if(f) { strcat(pom, "\r\n"); req = tl_str_append(req, pom); } } /**** referer URL ****/ if(cfg.referer) { if(urlp->parent_url) { url *par_url; LOCK_URL(urlp); par_url = (url *) urlp->parent_url->data; UNLOCK_URL(urlp); if(par_url->type != URLT_FILE) { p = url_to_urlstr(par_url, FALSE); req = tl_str_concat(req, "Referer: ", p, "\r\n", NULL); _free(p); } } else if(cfg.auto_referer) { p = url_to_urlstr(urlp, FALSE); req = tl_str_concat(req, "Referer: ", p, "\r\n", NULL); _free(p); } } /**** allow transfer encoding with gzip, compress ****/ if(cfg.use_enc) {#ifdef HAVE_ZLIB#if 0 /*MJF: Horrible workaround hack until deflate code works */ req = tl_str_append(req, "Accept-Encoding: x-gzip, gzip, x-compress, compress, deflate\r\n");#else req = tl_str_append(req, "Accept-Encoding: x-gzip, gzip\r\n");#endif#else req = tl_str_append(req, "Accept-Encoding: x-gzip, gzip, x-compress, compress\r\n");#endif } /**** reget (not supported by all servers) ****/ if(docp->rest_pos || docp->rest_end_pos > 0) { if(docp->rest_end_pos > 0) { snprintf(pom, sizeof(pom), "Range: bytes=%ld-%ld\r\n", (long) docp->rest_pos, (long) docp->rest_end_pos); } else { snprintf(pom, sizeof(pom), "Range: bytes=%ld-\r\n", (long) docp->rest_pos); } req = tl_str_append(req, pom); if(cfg.send_if_range && docp->etag) { snprintf(pom, sizeof(pom), "If-Range: %s\r\n", docp->etag); req = tl_str_append(req, pom); } } /*** conditional GET for sync mode ***/ if(docp->origtime && !docp->rest_pos) { LOCK_TIME; strftime(pom, sizeof(pom), "If-Modified-Since: %a, %d %b %Y %H:%M:%S GMT\r\n", gmtime(&docp->origtime)); UNLOCK_TIME; req = tl_str_append(req, pom); } /**** no caching ****/ if(!cfg.cache) { req = tl_str_append(req, "Pragma: no-cache\r\nCache-Control: no-cache\r\n"); } /**** additional headers via -httpadd ****/ if((p = http_get_additional_headers(urlp))) { req = tl_str_append(req, p); _free(p); } if(conttype) { snprintf(pom, sizeof(pom), "Content-type: %s\r\n", conttype); req = tl_str_append(req, pom); } if(datalen) { snprintf(pom, sizeof(pom), "Content-length: %d\r\n", datalen); req = tl_str_append(req, pom); } if(docp->additional_headers) { req = tl_str_append(req, docp->additional_headers); } req = tl_str_append(req, "\r\n"); DEBUG_PROTOC(gettext ("************ Client HTTP MIME header ***************\n")); DEBUG_PROTOC("%s", req); DEBUG_PROTOC("****************************************************\n"); /**** send request ****/ len = strlen(req); if((wlen = abs_write(docp->datasock, req, len)) != len) { xperror("abs_write"); _free(req); docp->errcode = ERR_HTTP_SNDREQ; return -1; } _free(req); if(data) { gui_set_status(gettext("Sending data ...")); DEBUG_PROTOD(gettext("************ HTTP request data ***************\n")); DEBUG_PROTOD("%s\n", data); DEBUG_PROTOD("*************************************************\n"); if(abs_write(docp->datasock, data, datalen) != datalen) { docp->errcode = ERR_HTTP_SNDREQDATA; return -1; } } gui_set_status(gettext("Waiting for response ...")); /*** handling of 1xx response codes ***/ while((wlen = abs_readln(docp->datasock, pom, sizeof(pom) - 1)) > 0) { http_response *resp; if(!timerisset(&docp->first_byte_time)) { gettimeofday(&docp->first_byte_time, NULL); } resp = http_get_response_info(pom); if(resp) _free(resp->text); bufio_unread(docp->datasock, pom, wlen); if(resp && (resp->ret_code < 200)) { _free(resp); if(http_read_mime_header(docp, &p, &len) <= 0) { xprintf(1, gettext("Error reading HTTP 1xx class response\n")); break; } else { DEBUG_PROTOS(gettext ("***************** class 1xx HTTP response ****************\n")); DEBUG_PROTOS("%s", p); DEBUG_PROTOS ("***********************************************************\n"); } } else { _free(resp); break; } } if(docp->is_http11 && docp->is_persistent && (!wlen || wlen < 0)) { docp->is_persistent = FALSE; docp->errcode = ERR_HTTP_CLOSURE; abs_close_socket(docp, FALSE); return -1; } if(wlen < 0) { docp->errcode = ERR_HTTP_RCVRESP; return -1; } return 0;}int http_get_request(doc * docp){ docp->request_type = HTTP_REQ_GET; return http_request(docp, "GET", 0, 0, 0);}int http_head_request(doc * docp){ docp->request_type = HTTP_REQ_HEAD; return http_request(docp, "HEAD", 0, 0, 0);}int http_post_request(doc * docp){ form_info *fi = (form_info *) docp->doc_url->extension; char *data = NULL; int datalen = 0; char *type = NULL; int rv; docp->request_type = HTTP_REQ_POST; if(fi->encoding == FORM_E_MULTIPART) { fi->text = form_encode_multipart_boundary(); type = tl_str_concat(type, "multipart/form-data; boundary=", fi->text, NULL); } else /*if (fi->encoding == FORM_E_URLENCODED) */ type = tl_strdup("application/x-www-form-urlencoded"); data = form_encode_query(fi, &datalen); if(!data && fi->infos) { _free(data); _free(type); _free(fi->text); docp->errcode = ERR_HTTP_BADRQ; return -1; } rv = http_request(docp, "POST", data, datalen, type); _free(data); _free(type); _free(fi->text); return rv;}static bufio *http_open_socket(doc * docp){ char *host = NULL; int port = 0;#define PENAULT_VAL 10 docp->errcode = ERR_NOERROR; if(docp->http_proxy) { host = docp->http_proxy; port = docp->http_proxy_port; } else { host = url_get_site(docp->doc_url); port = url_get_port(docp->doc_url); } if(!docp->datasock) { gui_set_status(gettext("Connecting ...")); docp->datasock = bufio_sock_fdopen(net_connect(host, port, docp));#ifdef USE_SSL if(docp->datasock && docp->doc_url->type == URLT_HTTPS) { bufio *ssl_sock; ssl_sock = my_ssl_do_connect(docp, docp->datasock, NULL); if(!ssl_sock) { if(!docp->errcode) docp->errcode = ERR_HTTPS_CONNECT; bufio_close(docp->datasock); docp->datasock = NULL; return NULL; } else { docp->datasock = ssl_sock; } }#endif } if(!docp->datasock) { if(_h_errno_ != 0) xherror(host); else xperror("net_connect"); if(docp->http_proxy) { docp->errcode = ERR_HTTP_PROXY_CONN; /*** for penaulting failed HTTP proxy servers ***/ if(docp->doc_url->type == URLT_HTTP) { http_proxy *pr; LOCK_PROXY; pr = http_proxy_find(docp->http_proxy, docp->http_proxy_port); if(pr) { pr->penault = PENAULT_VAL + pr->fails; pr->fails++; } UNLOCK_PROXY; } _free(docp->http_proxy); } else docp->errcode = ERR_HTTP_CONNECT; return NULL; } return docp->datasock;}void http_handle_redirect(doc * docu, int redir_code){ url *pomurl; char *pomcr = NULL; pomcr = get_mime_param_val_str("Location:", docu->mime); if(!pomcr) { pomcr = get_mime_param_val_str("URI:", docu->mime); if(pomcr) { char *p; p = strchr(pomcr, ';'); if(p) *p = '\0'; p = strchr(pomcr, '>'); if(p) *p = '\0'; if(pomcr[0] == '<') { p = pomcr; pomcr = tl_strdup(pomcr + 1); _free(p); } } else { docu->errcode = ERR_HTTP_BADREDIRECT; } } if(pomcr) { pomurl = url_parse(pomcr); assert(pomurl->type != URLT_FROMPARENT); if(pomurl->type == URLT_FILE) { char *base; char *baset; char *xp; baset = url_to_urlstr(docu->doc_url, FALSE); if((xp = strrchr(baset, '#'))) *xp = '\0'; if((xp = strrchr(baset, '?'))) *xp = '\0'; base = url_to_urlstr(docu->doc_url, FALSE); if(!tl_is_dirname(base)) { xp = strrchr(base, '/'); if(xp) *(xp + 1) = '\0'; } xp = url_to_absolute_url(base, baset, docu->doc_url, pomcr); free_deep_url(pomurl); _free(pomurl); pomurl = url_parse(xp); _free(xp); _free(base); _free(baset); } _free(pomcr); if(pomurl && prottable[pomurl->type].supported) { if(docu->is_robot) { docu->doc_url->moved_to = pomurl; docu->errcode = ERR_HTTP_REDIR; } else { if(url_redirect_to(docu->doc_url, pomurl, (redir_code == 303))) docu->errcode = ERR_HTTP_CYCLIC; else docu->errcode = ERR_HTTP_REDIR; } } else { if(pomurl) { free_deep_url(pomurl); _free(pomurl); docu->errcode = ERR_HTTP_UNSUPREDIR; } else docu->errcode = ERR_HTTP_BADREDIRECT; } } else docu->errcode = ERR_HTTP_BADREDIRECT;}static void http_process_response_11flags(doc * docu, http_response * resp){ char *p; if(cfg.use_http11 && !docu->http_proxy_10 && resp->ver_maj == 1 && resp->ver_min == 1) { docu->is_http11 = TRUE; docu->is_persistent = TRUE; docu->is_chunked = FALSE; docu->read_trailer = FALSE; } else { docu->is_http11 = FALSE; docu->is_persistent = FALSE; docu->is_chunked = FALSE; docu->read_trailer = FALSE; } p = get_mime_param_val_str("Content-Length:", docu->mime); if(p) { docu->totsz = _atoi(p); if(errno == ERANGE) docu->totsz = -1; _free(p); } p = get_mime_param_val_str("Transfer-Encoding:", docu->mime); if(p) { if(!strcasecmp(p, "chunked") || !strncasecmp(p, "chunked;", 8)) { docu->is_chunked = TRUE; docu->read_chunksize = TRUE; docu->read_trailer = FALSE; } _free(p); } p = get_mime_param_val_str("Connection:", docu->mime); if(p) { if(!strcasecmp(p, "close")) docu->is_persistent = FALSE; _free(p); } if(docu->http_proxy) { p = get_mime_param_val_str("Proxy-Connection:", docu->mime); if(p) { if(!strcasecmp(p, "close")) docu->is_persistent = FALSE; else if(!strcasecmp(p, "keep-alive")) docu->is_persistent = TRUE; _free(p); } }}/* * -1 - failure before sending auth data - http_process_response() can * continue safely * 0 - OK - http_process_response() must return immediately * 1 - failure after sending auth data - http_process_response() must * return immediately */static int http_do_proxy_redirect(doc * docp){ char *loc; int port; char proxy[256]; int rv = 0; loc = get_mime_param_val_str("Location:", docp->mime); if(!loc)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?