⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 http.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 4 页
字号:
        }        else if(data->state.authwant & CURLAUTH_BASIC) {          data->state.authavail |= CURLAUTH_BASIC;        }      }  return CURLE_OK;}/* fread() emulation to provide POST and/or request data */static int readmoredata(char *buffer,                        size_t size,                        size_t nitems,                        void *userp){  struct connectdata *conn = (struct connectdata *)userp;  struct HTTP *http = conn->proto.http;  int fullsize = size * nitems;  if(0 == http->postsize)    /* nothing to return */    return 0;    /* make sure that a HTTP request is never sent away chunked! */  conn->bits.forbidchunk= (http->sending == HTTPSEND_REQUEST)?TRUE:FALSE;  if(http->postsize <= fullsize) {    memcpy(buffer, http->postdata, http->postsize);    fullsize = http->postsize;    if(http->backup.postsize) {      /* move backup data into focus and continue on that */      http->postdata = http->backup.postdata;      http->postsize = http->backup.postsize;      conn->fread =    http->backup.fread;      conn->fread_in = http->backup.fread_in;      http->sending++; /* move one step up */      http->backup.postsize=0;    }    else      http->postsize = 0;    return fullsize;  }  memcpy(buffer, http->postdata, fullsize);  http->postdata += fullsize;  http->postsize -= fullsize;  return fullsize;}/* ------------------------------------------------------------------------- *//* * The add_buffer series of functions are used to build one large memory chunk * from repeated function invokes. Used so that the entire HTTP request can * be sent in one go. */struct send_buffer {  char *buffer;  size_t size_max;  size_t size_used;};typedef struct send_buffer send_buffer;static CURLcode add_buffer(send_buffer *in, const void *inptr, size_t size);/* * add_buffer_init() returns a fine buffer struct */staticsend_buffer *add_buffer_init(void){  send_buffer *blonk;  blonk=(send_buffer *)malloc(sizeof(send_buffer));  if(blonk) {    memset(blonk, 0, sizeof(send_buffer));    return blonk;  }  return NULL; /* failed, go home */}/* * add_buffer_send() sends a buffer and frees all associated memory. */staticCURLcode add_buffer_send(send_buffer *in,                         int sockfd,                         struct connectdata *conn,                         long *bytes_written) /* add the number of sent                                                 bytes to this counter */{  ssize_t amount;  CURLcode res;  char *ptr;  int size;  struct HTTP *http = conn->proto.http;  int sendsize;  /* The looping below is required since we use non-blocking sockets, but due     to the circumstances we will just loop and try again and again etc */  ptr = in->buffer;  size = in->size_used;  if(conn->protocol & PROT_HTTPS) {    /* We never send more than CURL_MAX_WRITE_SIZE bytes in one single chunk       when we speak HTTPS, as if only a fraction of it is sent now, this data       needs to fit into the normal read-callback buffer later on and that       buffer is using this size.    */    sendsize= (size > CURL_MAX_WRITE_SIZE)?CURL_MAX_WRITE_SIZE:size;    /* OpenSSL is very picky and we must send the SAME buffer pointer to the       library when we attempt to re-send this buffer. Sending the same data       is not enough, we must use the exact same address. For this reason, we       must copy the data to the uploadbuffer first, since that is the buffer       we will be using if this send is retried later.    */    memcpy(conn->data->state.uploadbuffer, ptr, sendsize);    ptr = conn->data->state.uploadbuffer;  }  else    sendsize = size;    res = Curl_write(conn, sockfd, ptr, sendsize, &amount);  if(CURLE_OK == res) {    if(conn->data->set.verbose)      /* this data _may_ contain binary stuff */      Curl_debug(conn->data, CURLINFO_HEADER_OUT, ptr, amount);    *bytes_written += amount;        if(amount != size) {      /* The whole request could not be sent in one system call. We must queue         it up and send it later when we get the chance. We must not loop here         and wait until it might work again. */      size -= amount;      ptr = in->buffer + amount;          /* backup the currently set pointers */      http->backup.fread = conn->fread;      http->backup.fread_in = conn->fread_in;      http->backup.postdata = http->postdata;      http->backup.postsize = http->postsize;      /* set the new pointers for the request-sending */      conn->fread = (curl_read_callback)readmoredata;      conn->fread_in = (void *)conn;      http->postdata = ptr;      http->postsize = size;      http->send_buffer = in;      http->sending = HTTPSEND_REQUEST;            return CURLE_OK;    }    http->sending = HTTPSEND_BODY;    /* the full buffer was sent, clean up and return */  }  if(in->buffer)    free(in->buffer);  free(in);  return res;}/*  * add_bufferf() builds a buffer from the formatted input */staticCURLcode add_bufferf(send_buffer *in, const char *fmt, ...){  char *s;  va_list ap;  va_start(ap, fmt);  s = vaprintf(fmt, ap); /* this allocs a new string to append */  va_end(ap);  if(s) {    CURLcode result = add_buffer(in, s, strlen(s));    free(s);    if(CURLE_OK == result)      return CURLE_OK;  }  /* If we failed, we cleanup the whole buffer and return error */  if(in->buffer)    free(in->buffer);  free(in);  return CURLE_OUT_OF_MEMORY;}/* * add_buffer() appends a memory chunk to the existing one */staticCURLcode add_buffer(send_buffer *in, const void *inptr, size_t size){  char *new_rb;  int new_size;  if(!in->buffer ||     ((in->size_used + size) > (in->size_max - 1))) {    new_size = (in->size_used+size)*2;    if(in->buffer)      /* we have a buffer, enlarge the existing one */      new_rb = (char *)realloc(in->buffer, new_size);    else      /* create a new buffer */      new_rb = (char *)malloc(new_size);    if(!new_rb)      return CURLE_OUT_OF_MEMORY;    in->buffer = new_rb;    in->size_max = new_size;  }  memcpy(&in->buffer[in->size_used], inptr, size);        in->size_used += size;  return CURLE_OK;}/* end of the add_buffer functions *//* ------------------------------------------------------------------------- *//* * Curl_compareheader() * * Returns TRUE if 'headerline' contains the 'header' with given 'content'. * Pass headers WITH the colon. */boolCurl_compareheader(char *headerline,    /* line to check */                   const char *header,  /* header keyword _with_ colon */                   const char *content) /* content string to find */{  /* RFC2616, section 4.2 says: "Each header field consists of a name followed   * by a colon (":") and the field value. Field names are case-insensitive.   * The field value MAY be preceded by any amount of LWS, though a single SP   * is preferred." */  size_t hlen = strlen(header);  size_t clen;  size_t len;  char *start;  char *end;  if(!strnequal(headerline, header, hlen))    return FALSE; /* doesn't start with header */  /* pass the header */  start = &headerline[hlen];  /* pass all white spaces */  while(*start && isspace((int)*start))    start++;  /* find the end of the header line */  end = strchr(start, '\r'); /* lines end with CRLF */  if(!end) {    /* in case there's a non-standard compliant line here */    end = strchr(start, '\n');    if(!end)      /* hm, there's no line ending here, use the zero byte! */      end = strchr(start, '\0');  }  len = end-start; /* length of the content part of the input line */  clen = strlen(content); /* length of the word to find */  /* find the content string in the rest of the line */  for(;len>=clen;len--, start++) {    if(strnequal(start, content, clen))      return TRUE; /* match! */  }  return FALSE; /* no match */}/* * ConnectHTTPProxyTunnel() requires that we're connected to a HTTP proxy. This * function will issue the necessary commands to get a seamless tunnel through * this proxy. After that, the socket can be used just as a normal socket. */CURLcode Curl_ConnectHTTPProxyTunnel(struct connectdata *conn,                                     int tunnelsocket,                                     char *hostname,                                     int remote_port){  int httpcode=0;  int subversion=0;  struct SessionHandle *data=conn->data;  CURLcode result;  int res;  int nread;   /* total size read */  int perline; /* count bytes per line */  bool keepon=TRUE;  ssize_t gotbytes;  char *ptr;  int timeout = 3600; /* default timeout in seconds */  struct timeval interval;  fd_set rkeepfd;  fd_set readfd;  char *line_start;  char *host_port;#define SELECT_OK      0#define SELECT_ERROR   1#define SELECT_TIMEOUT 2  int error = SELECT_OK;  infof(data, "Establish HTTP proxy tunnel to %s:%d\n", hostname, remote_port);  do {    bool auth; /* we don't really have to know when the auth phase is done,                  but this variable will be set to true then */    if(conn->newurl) {      /* This only happens if we've looped here due to authentication reasons,         and we don't really use the newly cloned URL here then. Just free()         it. */      free(conn->newurl);       conn->newurl = NULL;    }    host_port = aprintf("%s:%d", hostname, remote_port);    if(!host_port)      return CURLE_OUT_OF_MEMORY;    /* Setup the proxy-authorization header, if any */    result = http_auth_headers(conn, (char *)"CONNECT", host_port, &auth);    if(CURLE_OK == result) {      /* OK, now send the connect request to the proxy */      result =        Curl_sendf(tunnelsocket, conn,                   "CONNECT %s:%d HTTP/1.0\015\012"                   "%s"                   "%s"                   "\r\n",                   hostname, remote_port,                   conn->bits.proxy_user_passwd?                   conn->allocptr.proxyuserpwd:"",                   data->set.useragent?conn->allocptr.uagent:""                   );      if(result)        failf(data, "Failed sending CONNECT to proxy");    }    free(host_port);    if(result)      return result;    FD_ZERO (&readfd);		/* clear it */    FD_SET (tunnelsocket, &readfd);     /* read socket */    /* get this in a backup variable to be able to restore it on each lap in       the select() loop */    rkeepfd = readfd;    ptr=data->state.buffer;    line_start = ptr;    nread=0;    perline=0;    keepon=TRUE;    while((nread<BUFSIZE) && (keepon && !error)) {      readfd = rkeepfd;     /* set every lap */      interval.tv_sec = 1;  /* timeout each second and check the timeout */      interval.tv_usec = 0;      if(data->set.timeout) {        /* if timeout is requested, find out how much remaining time we have */        timeout = data->set.timeout - /* timeout time */          Curl_tvdiff(Curl_tvnow(), conn->now)/1000; /* spent time */        if(timeout <=0 ) {          failf(data, "Proxy connection aborted due to timeout");          error = SELECT_TIMEOUT; /* already too little time */          break;        }      }            switch (select (tunnelsocket+1, &readfd, NULL, NULL, &interval)) {      case -1: /* select() error, stop reading */        error = SELECT_ERROR;        failf(data, "Proxy CONNECT aborted due to select() error");        break;      case 0: /* timeout */        break;      default:        /*         * This code previously didn't use the kerberos sec_read() code         * to read, but when we use Curl_read() it may do so. Do confirm         * that this is still ok and then remove this comment!         */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -