⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hostip.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/*************************************************************************** *                                  _   _ ____  _      *  Project                     ___| | | |  _ \| |     *                             / __| | | | |_) | |     *                            | (__| |_| |  _ <| |___  *                             \___|\___/|_| \_\_____| * * Copyright (C) 1998 - 2003, Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at http://curl.haxx.se/docs/copyright.html. *  * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYING file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * * $Id: hostip.c,v 1.107 2003/10/28 13:06:17 bagder Exp $ ***************************************************************************/#include "setup.h"#include <string.h>#include <errno.h>#define _REENTRANT#if defined(WIN32) && !defined(__GNUC__) || defined(__MINGW32__)#include <malloc.h>#else#ifdef HAVE_SYS_TYPES_H#include <sys/types.h>#endif#ifdef HAVE_SYS_SOCKET_H#include <sys/socket.h>#endif#ifdef HAVE_NETINET_IN_H#include <netinet/in.h>#endif#ifdef HAVE_NETDB_H#include <netdb.h>#endif#ifdef HAVE_ARPA_INET_H#include <arpa/inet.h>#endif#ifdef HAVE_STDLIB_H#include <stdlib.h>	/* required for free() prototypes */#endif#ifdef	VMS#include <in.h>#include <inet.h>#include <stdlib.h>#endif#endif#ifdef HAVE_SETJMP_H#include <setjmp.h>#endif#include "urldata.h"#include "sendf.h"#include "hostip.h"#include "hash.h"#include "share.h"#include "url.h"#define _MPRINTF_REPLACE /* use our functions only */#include <curl/mprintf.h>#if defined(HAVE_INET_NTOA_R) && !defined(HAVE_INET_NTOA_R_DECL)#include "inet_ntoa_r.h"#endif/* The last #include file should be: */#ifdef CURLDEBUG#include "memdebug.h"#endifstatic curl_hash hostname_cache;static int host_cache_initialized;static Curl_addrinfo *my_getaddrinfo(struct connectdata *conn,                                     char *hostname,                                     int port,                                     int *waitp);#if !defined(HAVE_GETHOSTBYNAME_R) || defined(USE_ARES)static struct hostent* pack_hostent(char** buf, struct hostent* orig);#endifvoid Curl_global_host_cache_init(void){  if (!host_cache_initialized) {    Curl_hash_init(&hostname_cache, 7, Curl_freednsinfo);    host_cache_initialized = 1;  }}curl_hash *Curl_global_host_cache_get(void){  return &hostname_cache;}void Curl_global_host_cache_dtor(void){  if (host_cache_initialized) {    Curl_hash_clean(&hostname_cache);    host_cache_initialized = 0;  }}/* count the number of characters that an integer takes up */static int _num_chars(int i){  int chars = 0;  /* While the number divided by 10 is greater than one,    * re-divide the number by 10, and increment the number of    * characters by 1.   *   * this relies on the fact that for every multiple of 10,    * a new digit is added onto every number   */  do {    chars++;    i = (int) i / 10;  } while (i >= 1);  return chars;}/* Create a hostcache id */static char *create_hostcache_id(char *server, int port, ssize_t *entry_len){  char *id = NULL;  /* Get the length of the new entry id */  *entry_len = *entry_len + /* Hostname length */    1 +                     /* ':' seperator */    _num_chars(port);       /* number of characters the port will take up */    /* Allocate the new entry id */  id = malloc(*entry_len + 1);  if (!id)    return NULL;  /* Create the new entry */  /* If sprintf() doesn't return the entry length, that signals failure */  if (sprintf(id, "%s:%d", server, port) != *entry_len) {    /* Free the allocated id, set length to zero and return NULL */    *entry_len = 0;    free(id);    return NULL;  }  return id;}struct hostcache_prune_data {  int cache_timeout;  int now;};static inthostcache_timestamp_remove(void *datap, void *hc){  struct hostcache_prune_data *data =     (struct hostcache_prune_data *) datap;  struct Curl_dns_entry *c = (struct Curl_dns_entry *) hc;    if ((data->now - c->timestamp < data->cache_timeout) ||      c->inuse) {    /* please don't remove */    return 0;  }    /* fine, remove */  return 1;}static voidhostcache_prune(curl_hash *hostcache, int cache_timeout, int now){  struct hostcache_prune_data user;  user.cache_timeout = cache_timeout;  user.now = now;    Curl_hash_clean_with_criterium(hostcache,                                  (void *) &user,                                  hostcache_timestamp_remove);}#ifdef HAVE_SIGSETJMP/* Beware this is a global and unique instance */sigjmp_buf curl_jmpenv;#endif/* When calling Curl_resolv() has resulted in a response with a returned   address, we call this function to store the information in the dns   cache etc */static struct Curl_dns_entry *cache_resolv_response(struct SessionHandle *data,                      Curl_addrinfo *addr,                      char *hostname,                      int port){  char *entry_id;  ssize_t entry_len;  struct Curl_dns_entry *dns;  time_t now;  /* Create an entry id, based upon the hostname and port */  entry_len = strlen(hostname);  entry_id = create_hostcache_id(hostname, port, &entry_len);  /* If we can't create the entry id, fail */  if (!entry_id)    return NULL;  /* Create a new cache entry */  dns = (struct Curl_dns_entry *) malloc(sizeof(struct Curl_dns_entry));  if (!dns) {    Curl_freeaddrinfo(addr);    free(entry_id);    return NULL;  }  dns->inuse = 0;   /* init to not used */  dns->addr = addr; /* this is the address(es) */  /* Store the resolved data in our DNS cache. This function may return a     pointer to an existing struct already present in the hash, and it may     return the same argument we pass in. Make no assumptions. */  dns = Curl_hash_add(data->hostcache, entry_id, entry_len+1, (void *) dns);  if(!dns) {    /* major badness, run away! */    Curl_freeaddrinfo(addr);    free(entry_id);    return NULL;  }  time(&now);  dns->timestamp = now; /* used now */  dns->inuse++;         /* mark entry as in-use */      /* Remove outdated and unused entries from the hostcache */  hostcache_prune(data->hostcache,                   data->set.dns_cache_timeout,                   now);  /* free the allocated entry_id again */  free(entry_id);  return dns;}/* Resolve a name and return a pointer in the 'entry' argument if one   is available.   Return codes:   -1 = error, no pointer   0 = OK, pointer provided   1 = waiting for response, no pointer*/int Curl_resolv(struct connectdata *conn,                char *hostname,                int port,                struct Curl_dns_entry **entry){  char *entry_id = NULL;  struct Curl_dns_entry *dns = NULL;  ssize_t entry_len;  int wait;  struct SessionHandle *data = conn->data;  /* default to failure */  int rc = -1;  *entry = NULL;#ifdef HAVE_SIGSETJMP  /* this allows us to time-out from the name resolver, as the timeout     will generate a signal and we will siglongjmp() from that here */  if(!data->set.no_signal && sigsetjmp(curl_jmpenv, 1)) {    /* this is coming from a siglongjmp() */    failf(data, "name lookup timed out");    return -1;  }#endif  /* Create an entry id, based upon the hostname and port */  entry_len = strlen(hostname);  entry_id = create_hostcache_id(hostname, port, &entry_len);  /* If we can't create the entry id, fail */  if (!entry_id)    return -1;  if(data->share)    Curl_share_lock(data, CURL_LOCK_DATA_DNS, CURL_LOCK_ACCESS_SINGLE);  /* See if its already in our dns cache */  dns = Curl_hash_pick(data->hostcache, entry_id, entry_len+1);    if(data->share)    Curl_share_unlock(data, CURL_LOCK_DATA_DNS);  /* free the allocated entry_id again */  free(entry_id);  if (!dns) {    /* The entry was not in the cache. Resolve it to IP address */          /* If my_getaddrinfo() returns NULL, 'wait' might be set to a non-zero       value indicating that we need to wait for the response to the resolve       call */    Curl_addrinfo *addr = my_getaddrinfo(conn, hostname, port, &wait);        if (!addr) {      if(wait) {        /* the response to our resolve call will come asynchronously at            a later time, good or bad */        /* First, check that we haven't received the info by now */        (void)Curl_is_resolved(conn, &dns);        if(dns)          rc = 0; /* pointer provided */        else          rc = 1; /* no info yet */      }    }    else    {      if(data->share)        Curl_share_lock(data, CURL_LOCK_DATA_DNS, CURL_LOCK_ACCESS_SINGLE);      /* we got a response, store it in the cache */      dns = cache_resolv_response(data, addr, hostname, port);            if(data->share)        Curl_share_unlock(data, CURL_LOCK_DATA_DNS);      if(!dns) {        /* returned failure, bail out nicely */        Curl_freeaddrinfo(addr);      }    }  }  *entry = dns;  return rc;}void Curl_resolv_unlock(struct SessionHandle *data, struct Curl_dns_entry *dns){  if(data->share)    Curl_share_lock(data, CURL_LOCK_DATA_DNS, CURL_LOCK_ACCESS_SINGLE);  dns->inuse--;  if(data->share)    Curl_share_unlock(data, CURL_LOCK_DATA_DNS);}/* * This is a wrapper function for freeing name information in a protocol * independent way. This takes care of using the appropriate underlaying * function. */void Curl_freeaddrinfo(Curl_addrinfo *p){#ifdef ENABLE_IPV6  freeaddrinfo(p);#else  free(p); /* works fine for the ARES case too */#endif}/* * Free a cache dns entry. */void Curl_freednsinfo(void *freethis){  struct Curl_dns_entry *p = (struct Curl_dns_entry *) freethis;  Curl_freeaddrinfo(p->addr);  free(p);}/* --- resolve name or IP-number --- *//* Allocate enough memory to hold the full name information structs and * everything. OSF1 is known to require at least 8872 bytes. The buffer * required for storing all possible aliases and IP numbers is according to * Stevens' Unix Network Programming 2nd edition, p. 304: 8192 bytes! */#define CURL_NAMELOOKUP_SIZE 9000#ifdef USE_ARESCURLcode Curl_multi_ares_fdset(struct connectdata *conn,                               fd_set *read_fd_set,                               fd_set *write_fd_set,                               int *max_fdp){  int max = ares_fds(conn->data->state.areschannel,                     read_fd_set, write_fd_set);  *max_fdp = max;  return CURLE_OK;}/* called to check if the name is resolved now */CURLcode Curl_is_resolved(struct connectdata *conn,                          struct Curl_dns_entry **dns){  fd_set read_fds, write_fds;  static const struct timeval tv={0,0};  int count;  struct SessionHandle *data = conn->data;  int nfds;  FD_ZERO(&read_fds);  FD_ZERO(&write_fds);  nfds = ares_fds(data->state.areschannel, &read_fds, &write_fds);  count = select(nfds, &read_fds, &write_fds, NULL,                 (struct timeval *)&tv);  if(count)    ares_process(data->state.areschannel, &read_fds, &write_fds);  *dns = NULL;  if(conn->async.done) {    if(!conn->async.dns)      return CURLE_COULDNT_RESOLVE_HOST;    *dns = conn->async.dns;  }  return CURLE_OK;}/* This is a function that locks and waits until the name resolve operation   has completed.   If 'entry' is non-NULL, make it point to the resolved dns entry   Return CURLE_COULDNT_RESOLVE_HOST if the host was not resolved, and   CURLE_OPERATION_TIMEDOUT if a time-out occurred.*/CURLcode Curl_wait_for_resolv(struct connectdata *conn,                              struct Curl_dns_entry **entry){  CURLcode rc=CURLE_OK;  struct SessionHandle *data = conn->data;      /* Wait for the name resolve query to complete. */  while (1) {    int nfds=0;    fd_set read_fds, write_fds;    struct timeval *tvp, tv;    int count;        FD_ZERO(&read_fds);    FD_ZERO(&write_fds);    nfds = ares_fds(data->state.areschannel, &read_fds, &write_fds);    if (nfds == 0)      break;    tvp = ares_timeout(data->state.areschannel,                       NULL, /* pass in our maximum time here */                       &tv);    count = select(nfds, &read_fds, &write_fds, NULL, tvp);    if (count < 0 && errno != EINVAL)      break;    ares_process(data->state.areschannel, &read_fds, &write_fds);  }  /* Operation complete, if the lookup was successful we now have the entry     in the cache. */      /* this destroys the channel and we cannot use it anymore after this */  ares_destroy(data->state.areschannel);  if(entry)    *entry = conn->async.dns;  if(!conn->async.dns) {    /* a name was not resolved */    if(conn->async.done) {      failf(data, "Could not resolve host: %s", conn->name);      rc = CURLE_COULDNT_RESOLVE_HOST;    }    else      rc = CURLE_OPERATION_TIMEDOUT;    /* close the connection, since we can't return failure here without       cleaning up this connection properly */    Curl_disconnect(conn);  }    return rc;}/* this function gets called by ares when we got the name resolved */static void host_callback(void *arg, /* "struct connectdata *" */                          int status,                          struct hostent *hostent){  struct connectdata *conn = (struct connectdata *)arg;  struct Curl_dns_entry *dns = NULL;  conn->async.done = TRUE;  conn->async.status = status;  if(ARES_SUCCESS == status) {    /* we got a resolved name in 'hostent' */    char *bufp = (char *)malloc(CURL_NAMELOOKUP_SIZE);    if(bufp) {      /* pack_hostent() copies to and shrinks the target buffer */      struct hostent *he = pack_hostent(&bufp, hostent);      struct SessionHandle *data = conn->data;      if(data->share)        Curl_share_lock(data, CURL_LOCK_DATA_DNS, CURL_LOCK_ACCESS_SINGLE);      dns = cache_resolv_response(data, he,                                  conn->async.hostname, conn->async.port);      if(data->share)        Curl_share_unlock(data, CURL_LOCK_DATA_DNS);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -