⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 url.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
/*************************************************************************** *                                  _   _ ____  _ *  Project                     ___| | | |  _ \| | *                             / __| | | | |_) | | *                            | (__| |_| |  _ <| |___ *                             \___|\___/|_| \_\_____| * * Copyright (C) 1998 - 2003, Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at http://curl.haxx.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYING file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * * $Id: url.c,v 1.313 2003/10/29 09:53:22 bagder Exp $ ***************************************************************************//* -- WIN32 approved -- */#include "setup.h"#include <stdio.h>#include <string.h>#include <stdarg.h>#include <stdlib.h>#include <ctype.h>#include <sys/types.h>#include <sys/stat.h>#include <errno.h>#if defined(WIN32) && !defined(__GNUC__) || defined(__MINGW32__)#include <time.h>#include <io.h>#else#ifdef HAVE_SYS_SOCKET_H#include <sys/socket.h>#endif#include <netinet/in.h>#include <sys/time.h>#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#include <netdb.h>#ifdef HAVE_ARPA_INET_H#include <arpa/inet.h>#endif#ifdef HAVE_NET_IF_H#include <net/if.h>#endif#include <sys/ioctl.h>#include <signal.h>#ifdef HAVE_SYS_PARAM_H#include <sys/param.h>#endif#ifdef HAVE_SYS_SELECT_H#include <sys/select.h>#endif#ifdef VMS#include <in.h>#include <inet.h>#endif#ifdef HAVE_SETJMP_H#include <setjmp.h>#endif#ifndef HAVE_SELECT#error "We can't compile without select() support!"#endif#ifndef HAVE_SOCKET#error "We can't compile without socket() support!"#endif#endif#ifdef HAVE_OPENSSL_ENGINE_H#include <openssl/engine.h>#endif#include "urldata.h"#include "netrc.h"#include "formdata.h"#include "base64.h"#include "ssluse.h"#include "hostip.h"#include "if2ip.h"#include "transfer.h"#include "sendf.h"#include "progress.h"#include "cookie.h"#include "strequal.h"#include "escape.h"#include "strtok.h"#include "share.h"#include "content_encoding.h"#include "http_digest.h"#include "http_negotiate.h"/* And now for the protocols */#include "ftp.h"#include "dict.h"#include "telnet.h"#include "http.h"#include "file.h"#include "ldap.h"#include "url.h"#include "connect.h"#include "ca-bundle.h"#include <curl/types.h>#if defined(HAVE_INET_NTOA_R) && !defined(HAVE_INET_NTOA_R_DECL)#include "inet_ntoa_r.h"#endif#define _MPRINTF_REPLACE /* use our functions only */#include <curl/mprintf.h>#ifdef KRB4#include "security.h"#endif/* The last #include file should be: */#ifdef CURLDEBUG#include "memdebug.h"#endif/* Local static prototypes */static int ConnectionKillOne(struct SessionHandle *data);static bool ConnectionExists(struct SessionHandle *data,                             struct connectdata *needle,                             struct connectdata **usethis);static unsigned int ConnectionStore(struct SessionHandle *data,                                    struct connectdata *conn);static bool safe_strequal(char* str1, char* str2);#ifndef USE_ARES/* not for Win32, unless it is cygwin   not for ares builds */#if !defined(WIN32) || defined(__CYGWIN32__)#ifndef RETSIGTYPE#define RETSIGTYPE void#endif#ifdef HAVE_SIGSETJMPextern sigjmp_buf curl_jmpenv;#endifstaticRETSIGTYPE alarmfunc(int sig){  /* this is for "-ansi -Wall -pedantic" to stop complaining!   (rabe) */  (void)sig;#ifdef HAVE_SIGSETJMP  siglongjmp(curl_jmpenv, 1);#endif  return;}#endif#endif /* USE_ARES */void Curl_safefree(void *ptr){  if(ptr)    free(ptr);}/* * This is the internal function curl_easy_cleanup() calls. This should * cleanup and free all resources associated with this sessionhandle. * * NOTE: if we ever add something that attempts to write to a socket or * similar here, we must ignore SIGPIPE first. It is currently only done * when curl_easy_perform() is invoked. */CURLcode Curl_close(struct SessionHandle *data){  /* Loop through all open connections and kill them one by one */  while(-1 != ConnectionKillOne(data));#ifdef USE_SSLEAY  /* Close down all open SSL info and sessions */  Curl_SSL_Close_All(data);#endif  if(data->change.cookielist) /* clean up list if any */    curl_slist_free_all(data->change.cookielist);  Curl_safefree(data->state.auth_host);  Curl_safefree(data->state.scratch);  if(data->change.proxy_alloc)    free(data->change.proxy);  if(data->change.referer_alloc)    free(data->change.referer);  if(data->change.url_alloc)    free(data->change.url);  Curl_safefree(data->state.headerbuff);#ifndef CURL_DISABLE_HTTP  Curl_share_lock(data, CURL_LOCK_DATA_COOKIE, CURL_LOCK_ACCESS_SINGLE);  if(data->set.cookiejar) {    /* we have a "destination" for all the cookies to get dumped to */    if(Curl_cookie_output(data->cookies, data->set.cookiejar))      infof(data, "WARNING: failed to save cookies in given jar\n");  }  if( !data->share || (data->cookies != data->share->cookies) ) {    Curl_cookie_cleanup(data->cookies);  }  Curl_share_unlock(data, CURL_LOCK_DATA_COOKIE);#endif  /* free the connection cache */  free(data->state.connects);  Curl_safefree(data->info.contenttype);  Curl_digest_cleanup(data);  /* No longer a dirty share, if it exists */  if (data->share)    data->share->dirty--;  free(data);  return CURLE_OK;}CURLcode Curl_open(struct SessionHandle **curl){  /* We don't yet support specifying the URL at this point */  struct SessionHandle *data;  /* Very simple start-up: alloc the struct, init it with zeroes and return */  data = (struct SessionHandle *)malloc(sizeof(struct SessionHandle));  if(!data)    /* this is a very serious error */    return CURLE_OUT_OF_MEMORY;  memset(data, 0, sizeof(struct SessionHandle));  /* We do some initial setup here, all those fields that can't be just 0 */  data->state.headerbuff=(char*)malloc(HEADERSIZE);  if(!data->state.headerbuff) {    free(data); /* free the memory again */    return CURLE_OUT_OF_MEMORY;  }  data->state.headersize=HEADERSIZE;  data->set.out = stdout; /* default output to stdout */  data->set.in  = stdin;  /* default input from stdin */  data->set.err  = stderr;  /* default stderr to stderr */  /* use fwrite as default function to store output */  data->set.fwrite = (curl_write_callback)fwrite;  /* use fread as default function to read input */  data->set.fread = (curl_read_callback)fread;  data->set.infilesize = -1; /* we don't know any size */  data->state.current_speed = -1; /* init to negative == impossible */  data->set.httpreq = HTTPREQ_GET; /* Default HTTP request */  data->set.ftp_use_epsv = TRUE;   /* FTP defaults to EPSV operations */  data->set.ftp_use_eprt = TRUE;   /* FTP defaults to EPRT operations */  data->set.dns_cache_timeout = 60; /* Timeout every 60 seconds by default */  /* make libcurl quiet by default: */  data->set.hide_progress = TRUE;  /* CURLOPT_NOPROGRESS changes these */  data->progress.flags |= PGRS_HIDE;  /* Set the default size of the SSL session ID cache */  data->set.ssl.numsessions = 5;  data->set.proxyport = 1080;  data->set.proxytype = CURLPROXY_HTTP; /* defaults to HTTP proxy */  data->set.httpauth = CURLAUTH_BASIC; /* defaults to basic authentication */  data->set.proxyauth = CURLAUTH_BASIC; /* defaults to basic authentication */  /* create an array with connection data struct pointers */  data->state.numconnects = 5; /* hard-coded right now */  data->state.connects = (struct connectdata **)    malloc(sizeof(struct connectdata *) * data->state.numconnects);  if(!data->state.connects) {    free(data->state.headerbuff);    free(data);    return CURLE_OUT_OF_MEMORY;  }  /*   * libcurl 7.10 introduces SSL verification *by default*! This needs to be   * switched off unless wanted.   */  data->set.ssl.verifypeer = TRUE;  data->set.ssl.verifyhost = 2;#ifdef CURL_CA_BUNDLE  /* This is our prefered CA cert bundle since install time */  data->set.ssl.CAfile = (char *)CURL_CA_BUNDLE;#endif  memset(data->state.connects, 0,         sizeof(struct connectdata *)*data->state.numconnects);  *curl = data;  return CURLE_OK;}CURLcode Curl_setopt(struct SessionHandle *data, CURLoption option, ...){  va_list param;  char *cookiefile;  va_start(param, option);  switch(option) {  case CURLOPT_DNS_CACHE_TIMEOUT:    data->set.dns_cache_timeout = va_arg(param, int);    break;  case CURLOPT_DNS_USE_GLOBAL_CACHE:    {      int use_cache = va_arg(param, int);      if (use_cache) {        Curl_global_host_cache_init();      }      data->set.global_dns_cache = use_cache;    }    break;  case CURLOPT_SSL_CIPHER_LIST:    /* set a list of cipher we want to use in the SSL connection */    data->set.ssl.cipher_list = va_arg(param, char *);    break;  case CURLOPT_RANDOM_FILE:    /*     * This is the path name to a file that contains random data to seed     * the random SSL stuff with. The file is only used for reading.     */    data->set.ssl.random_file = va_arg(param, char *);    break;  case CURLOPT_EGDSOCKET:    /*     * The Entropy Gathering Daemon socket pathname     */    data->set.ssl.egdsocket = va_arg(param, char *);    break;  case CURLOPT_MAXCONNECTS:    /*     * Set the absolute number of maximum simultaneous alive connection that     * libcurl is allowed to have.     */    {      long newconnects= va_arg(param, long);      struct connectdata **newptr;      if(newconnects < data->state.numconnects) {        /* Since this number is *decreased* from the existing number, we must           close the possibly open connections that live on the indexes that           are being removed! */        int i;        for(i=newconnects; i< data->state.numconnects; i++)          Curl_disconnect(data->state.connects[i]);      }      if(newconnects) {        int i;        newptr= (struct connectdata **)          realloc(data->state.connects,                  sizeof(struct connectdata *) * newconnects);        if(!newptr)          /* we closed a few connections in vain, but so what? */          return CURLE_OUT_OF_MEMORY;        /* nullify the newly added pointers */        for(i=data->state.numconnects; i<newconnects; i++) {          newptr[i] = NULL;        }        data->state.connects = newptr;        data->state.numconnects = newconnects;      }      else {        /* zero makes NO cache at all */        if(data->state.connects)          free(data->state.connects);        data->state.connects=NULL;        data->state.numconnects=0;      }    }    break;  case CURLOPT_FORBID_REUSE:    /*     * When this transfer is done, it must not be left to be reused by a     * subsequent transfer but shall be closed immediately.     */    data->set.reuse_forbid = va_arg(param, long)?TRUE:FALSE;    break;  case CURLOPT_FRESH_CONNECT:    /*     * This transfer shall not use a previously cached connection but     * should be made with a fresh new connect!     */    data->set.reuse_fresh = va_arg(param, long)?TRUE:FALSE;    break;  case CURLOPT_VERBOSE:    /*     * Verbose means infof() calls that give a lot of information about     * the connection and transfer procedures as well as internal choices.     */    data->set.verbose = va_arg(param, long)?TRUE:FALSE;    break;  case CURLOPT_HEADER:    /*     * Set to include the header in the general data output stream.     */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -