⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 transfer.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
/*************************************************************************** *                                  _   _ ____  _      *  Project                     ___| | | |  _ \| |     *                             / __| | | | |_) | |     *                            | (__| |_| |  _ <| |___  *                             \___|\___/|_| \_\_____| * * Copyright (C) 1998 - 2003, Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at http://curl.haxx.se/docs/copyright.html. *  * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYING file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * * $Id: transfer.c,v 1.182 2003/10/24 21:54:34 bagder Exp $ ***************************************************************************/#include "setup.h"/* -- WIN32 approved -- */#include <stdio.h>#include <string.h>#include <stdarg.h>#include <stdlib.h>#include <ctype.h>#include <sys/types.h>#include <sys/stat.h>#include <errno.h>#include "strequal.h"#if defined(WIN32) && !defined(__GNUC__) || defined(__MINGW32__)#include <time.h>#include <io.h>#else#ifdef HAVE_SYS_SOCKET_H#include <sys/socket.h>#endif#include <netinet/in.h>#include <sys/time.h>#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#include <netdb.h>#ifdef HAVE_ARPA_INET_H#include <arpa/inet.h>#endif#ifdef HAVE_NET_IF_H#include <net/if.h>#endif#include <sys/ioctl.h>#include <signal.h>#ifdef HAVE_SYS_PARAM_H#include <sys/param.h>#endif#ifdef HAVE_SYS_SELECT_H#include <sys/select.h>#endif#ifndef HAVE_SELECT#error "We can't compile without select() support!"#endif#ifndef HAVE_SOCKET#error "We can't compile without socket() support!"#endif#endif#include "urldata.h"#include <curl/curl.h>#include <curl/types.h>#include "netrc.h"#include "content_encoding.h"   /* content encoding support. 08/27/02 jhrg */#include "hostip.h"#include "transfer.h"#include "sendf.h"#include "speedcheck.h"#include "progress.h"#include "getdate.h"#include "http.h"#include "url.h"#include "getinfo.h"#include "ssluse.h"#include "http_digest.h"#include "http_ntlm.h"#include "http_negotiate.h"#include "share.h"#define _MPRINTF_REPLACE /* use our functions only */#include <curl/mprintf.h>/* The last #include file should be: */#ifdef CURLDEBUG#include "memdebug.h"#endif#ifndef min#define min(a, b)   ((a) < (b) ? (a) : (b))#endif#define CURL_TIMEOUT_EXPECT_100 1000 /* counting ms here */enum {  KEEP_NONE,  KEEP_READ,  KEEP_WRITE};/* We keep this static and global since this is read-only and NEVER   changed. It should just remain a blanked-out timeout value. */static struct timeval notimeout={0,0};/* * This function will call the read callback to fill our buffer with data * to upload. */static int fillbuffer(struct connectdata *conn,                      int bytes){  int buffersize = bytes;  int nread;  if(conn->bits.upload_chunky) {    /* if chunked Transfer-Encoding */    buffersize -= (8 + 2 + 2);   /* 32bit hex + CRLF + CRLF */    conn->upload_fromhere += 10; /* 32bit hex + CRLF */  }    nread = conn->fread(conn->upload_fromhere, 1,                      buffersize, conn->fread_in);            if(!conn->bits.forbidchunk && conn->bits.upload_chunky) {    /* if chunked Transfer-Encoding */    char hexbuffer[11];    int hexlen = snprintf(hexbuffer, sizeof(hexbuffer),                          "%x\r\n", nread);    /* move buffer pointer */    conn->upload_fromhere -= hexlen;    nread += hexlen;    /* copy the prefix to the buffer */    memcpy(conn->upload_fromhere, hexbuffer, hexlen);    if(nread>hexlen) {      /* append CRLF to the data */      memcpy(conn->upload_fromhere +             nread, "\r\n", 2);      nread+=2;    }    else {      /* mark this as done once this chunk is transfered */      conn->keep.upload_done = TRUE;    }  }  return nread;}/* * checkhttpprefix() * * Returns TRUE if member of the list matches prefix of string */static boolcheckhttpprefix(struct SessionHandle *data,                const char *s){  struct curl_slist *head = data->set.http200aliases;  while (head) {    if (checkprefix(head->data, s))      return TRUE;    head = head->next;  }  if(checkprefix("HTTP/", s))    return TRUE;  return FALSE;}CURLcode Curl_readwrite(struct connectdata *conn,                        bool *done){  struct Curl_transfer_keeper *k = &conn->keep;  struct SessionHandle *data = conn->data;  int result;  ssize_t nread; /* number of bytes read */  int didwhat=0;    /* These two are used only if no other select() or _fdset() have been     invoked before this. This typicly happens if you use the multi interface     and call curl_multi_perform() without calling curl_multi_fdset()     first. */  fd_set extrareadfd;  fd_set extrawritefd;  fd_set *readfdp = k->readfdp;  fd_set *writefdp = k->writefdp;    if((k->keepon & KEEP_READ) && !readfdp) {    /* reading is requested, but no socket descriptor pointer was set */    FD_ZERO(&extrareadfd);    FD_SET(conn->sockfd, &extrareadfd);    readfdp = &extrareadfd;    /* no write, no exceptions, no timeout */    select(conn->sockfd+1, readfdp, NULL, NULL, &notimeout);  }  if((k->keepon & KEEP_WRITE) && !writefdp) {    /* writing is requested, but no socket descriptor pointer was set */    FD_ZERO(&extrawritefd);    FD_SET(conn->writesockfd, &extrawritefd);    writefdp = &extrawritefd;    /* no read, no exceptions, no timeout */    select(conn->writesockfd+1, NULL, writefdp, NULL, &notimeout);  }  do {    /* If we still have reading to do, we check if we have a readable       socket. Sometimes the reafdp is NULL, if no fd_set was done using       the multi interface and then we can do nothing but to attempt a       read to be sure. */    if((k->keepon & KEEP_READ) &&       (!readfdp || FD_ISSET(conn->sockfd, readfdp))) {      bool readdone = TRUE;      /* This is where we loop until we have read everything there is to         read or we get a EWOULDBLOCK */      do {        int buffersize = data->set.buffer_size?          data->set.buffer_size:BUFSIZE -1;        /* receive data from the network! */        result = Curl_read(conn, conn->sockfd, k->buf, buffersize, &nread);        if(0>result)          break; /* get out of loop */        if(result>0)          return result;        if ((k->bytecount == 0) && (k->writebytecount == 0)) {          Curl_pgrsTime(data, TIMER_STARTTRANSFER);          if(k->wait100_after_headers)            /* set time stamp to compare with when waiting for the 100 */            k->start100 = Curl_tvnow();        }        didwhat |= KEEP_READ;        /* NULL terminate, allowing string ops to be used */        if (0 < nread)          k->buf[nread] = 0;        /* if we receive 0 or less here, the server closed the connection and           we bail out from this! */        else if (0 >= nread) {          k->keepon &= ~KEEP_READ;          FD_ZERO(&k->rkeepfd);          readdone = TRUE;          break;        }        /* Default buffer to use when we write the buffer, it may be changed           in the flow below before the actual storing is done. */        k->str = k->buf;        /* Since this is a two-state thing, we check if we are parsing           headers at the moment or not. */                  if (k->header) {          /* we are in parse-the-header-mode */          bool stop_reading = FALSE;          /* header line within buffer loop */          do {            int hbufp_index;            int rest_length;            int full_length;            int writetype;                          /* str_start is start of line within buf */            k->str_start = k->str;                          k->end_ptr = strchr (k->str_start, '\n');                          if (!k->end_ptr) {              /* Not a complete header line within buffer, append the data to                 the end of the headerbuff. */              if (k->hbuflen + nread >= data->state.headersize) {                /* We enlarge the header buffer as it is too small */                char *newbuff;                long newsize=MAX((k->hbuflen+nread)*3/2,                                 data->state.headersize*2);                hbufp_index = k->hbufp - data->state.headerbuff;                newbuff = (char *)realloc(data->state.headerbuff, newsize);                if(!newbuff) {                  failf (data, "Failed to alloc memory for big header!");                  return CURLE_OUT_OF_MEMORY;                }                data->state.headersize=newsize;                data->state.headerbuff = newbuff;                k->hbufp = data->state.headerbuff + hbufp_index;              }              memcpy(k->hbufp, k->str, nread);              k->hbufp += nread;              k->hbuflen += nread;              if (!k->headerline && (k->hbuflen>5)) {                /* make a first check that this looks like a HTTP header */                if(!checkhttpprefix(data, data->state.headerbuff)) {                  /* this is not the beginning of a HTTP first header line */                  k->header = FALSE;                  k->badheader = HEADER_ALLBAD;                  break;                }              }              break; /* read more and try again */            }            /* decrease the size of the remaining (supposed) header line */            rest_length = (k->end_ptr - k->str)+1;            nread -= rest_length;             k->str = k->end_ptr + 1; /* move past new line */            full_length = k->str - k->str_start;            /*             * We're about to copy a chunk of data to the end of the             * already received header. We make sure that the full string             * fit in the allocated header buffer, or else we enlarge              * it.             */            if (k->hbuflen + full_length >=                data->state.headersize) {              char *newbuff;              long newsize=MAX((k->hbuflen+full_length)*3/2,                               data->state.headersize*2);              hbufp_index = k->hbufp - data->state.headerbuff;              newbuff = (char *)realloc(data->state.headerbuff, newsize);              if(!newbuff) {                failf (data, "Failed to alloc memory for big header!");                return CURLE_OUT_OF_MEMORY;              }              data->state.headersize= newsize;              data->state.headerbuff = newbuff;              k->hbufp = data->state.headerbuff + hbufp_index;            }            /* copy to end of line */            strncpy (k->hbufp, k->str_start, full_length);            k->hbufp += full_length;            k->hbuflen += full_length;            *k->hbufp = 0;            k->end_ptr = k->hbufp;                          k->p = data->state.headerbuff;                          /****             * We now have a FULL header line that p points to

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -