⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 urldata.h

📁 harvest是一个下载html网页得机器人
💻 H
📖 第 1 页 / 共 3 页
字号:
 * to struct to allow the function to return and get re-invoked better without * losing state. */struct Curl_transfer_keeper {  int bytecount;                /* total number of bytes read */  int writebytecount;           /* number of bytes written */  long contentlength;           /* size of incoming data */  struct timeval start;         /* transfer started at this time */  struct timeval now;           /* current time */  bool header;	                /* incoming data has HTTP header */  enum {    HEADER_NORMAL,      /* no bad header at all */    HEADER_PARTHEADER,  /* part of the chunk is a bad header, the rest is                           normal data */    HEADER_ALLBAD       /* all was believed to be header */  } badheader;		        /* the header was deemed bad and will be                                   written as body */  int headerline;		/* counts header lines to better track the                                   first one */  char *hbufp;			/* points at *end* of header line */  int hbuflen;  char *str;			/* within buf */  char *str_start;		/* within buf */  char *end_ptr;		/* within buf */  char *p;			/* within headerbuff */  bool content_range;      	/* set TRUE if Content-Range: was found */  int offset;	                /* possible resume offset read from the                                   Content-Range: header */  int httpcode;		        /* error code from the 'HTTP/1.? XXX' line */  int httpversion;		/* the HTTP version*10 */  struct timeval start100;      /* time stamp to wait for the 100 code from */  bool write_after_100_header;  /* TRUE = we enable the write after we                                   received a 100-continue/timeout or                                   FALSE = directly */  bool wait100_after_headers;   /* TRUE = after the request-headers have been                                   sent off properly, we go into the wait100                                   state, FALSE = don't */  int content_encoding;  	/* What content encoding. sec 3.5, RFC2616. */#define IDENTITY 0		/* No encoding */#define DEFLATE 1		/* zlib delfate [RFC 1950 & 1951] */#define GZIP 2			/* gzip algorithm [RFC 1952] */#define COMPRESS 3		/* Not handled, added for completeness */#ifdef HAVE_LIBZ  bool zlib_init;		/* True if zlib already initialized;				   undefined if Content-Encoding header. */  z_stream z;			/* State structure for zlib. */#endif  /* for the low speed checks: */  time_t timeofdoc;  long bodywrites;  char *buf;  char *uploadbuf;  int maxfd;  /* pointers to the actual descriptors we check */  fd_set *readfdp;  fd_set *writefdp;  /* the file descriptors to play with */  fd_set readfd;  fd_set writefd;  fd_set rkeepfd;  fd_set wkeepfd;  int keepon;  bool upload_done; /* set to TRUE when doing chunked transfer-encoding upload                       and we're uploading the last chunk */  bool ignorebody;  /* we read a response-body but we ignore it! */};#ifdef USE_ARESstruct Curl_async {  char *hostname;  int port;  struct Curl_dns_entry *dns;  bool done;  /* set TRUE when the lookup is complete */  int status; /* if done is TRUE, this is the status from the callback */};#endif/* * The connectdata struct contains all fields and variables that should be * unique for an entire connection. */struct connectdata {  /**** Fields set when inited and not modified again */  struct SessionHandle *data; /* link to the root CURL struct */  int connectindex; /* what index in the connects index this particular                       struct has */  long protocol; /* PROT_* flags concerning the protocol set */#define PROT_MISSING (1<<0)#define PROT_GOPHER  (1<<1)#define PROT_HTTP    (1<<2)#define PROT_HTTPS   (1<<3)#define PROT_FTP     (1<<4)#define PROT_TELNET  (1<<5)#define PROT_DICT    (1<<6)#define PROT_LDAP    (1<<7)#define PROT_FILE    (1<<8)#define PROT_FTPS    (1<<9)#define PROT_SSL     (1<<10) /* protocol requires SSL */  /* the particular host we use, in two different ways */  struct Curl_dns_entry *connect_addr;#ifdef ENABLE_IPV6  struct addrinfo *serv_addr;#else  struct sockaddr_in serv_addr;#endif  char protostr[64];  /* store the protocol string in this buffer */  char gname[513]; /* store the hostname in this buffer */  char *name;      /* host name pointer to fool around with */  char *path;      /* allocated buffer to store the URL's path part in */  char *hostname;  /* hostname to connect, as parsed from url */  long port;       /* which port to use locally */  unsigned short remote_port; /* what remote port to connect to,                                 not the proxy port! */  char *ppath;  long bytecount;  long headerbytecount;  /* only count received headers */  char *range; /* range, if used. See README for detailed specification on                  this syntax. */  ssize_t resume_from; /* continue [ftp] transfer from here */  char *proxyhost; /* name of the http proxy host */  char *user;    /* user name string, allocated */  char *passwd;  /* password string, allocated */  char *proxyuser;    /* proxy user name string, allocated */  char *proxypasswd;  /* proxy password string, allocated */    struct timeval now;     /* "current" time */  struct timeval created; /* creation time */  int firstsocket;     /* the main socket to use */  int secondarysocket; /* for i.e ftp transfers */  long maxdownload; /* in bytes, the maximum amount of data to fetch, 0                       means unlimited */    struct ssl_connect_data ssl; /* this is for ssl-stuff */  struct ssl_config_data ssl_config;  struct ConnectBits bits;    /* various state-flags for this connection */  /* These two functions MUST be set by the curl_connect() function to be     be protocol dependent */  CURLcode (*curl_do)(struct connectdata *);  CURLcode (*curl_done)(struct connectdata *);  /* If the curl_do() function is better made in two halves, this   * curl_do_more() function will be called afterwards, if set. For example   * for doing the FTP stuff after the PASV/PORT command.   */  CURLcode (*curl_do_more)(struct connectdata *);  /* This function *MAY* be set to a protocol-dependent function that is run   * after the connect() and everything is done, as a step in the connection.   */   CURLcode (*curl_connect)(struct connectdata *);  /* This function *MAY* be set to a protocol-dependent function that is run   * by the curl_disconnect(), as a step in the disconnection.   */   CURLcode (*curl_disconnect)(struct connectdata *);  /* This function *MAY* be set to a protocol-dependent function that is run   * in the curl_close() function if protocol-specific cleanups are required.   */   CURLcode (*curl_close)(struct connectdata *);  /**** curl_get() phase fields */  /* READ stuff */  int sockfd;		 /* socket to read from or -1 */  int size;		 /* -1 if unknown at this point */  long *bytecountp;	 /* return number of bytes read or NULL */            /* WRITE stuff */  int writesockfd;       /* socket to write to, it may very well be                            the same we read from. -1 disables */  long *writebytecountp; /* return number of bytes written or NULL */  /** Dynamicly allocated strings, may need to be freed before this **/  /** struct is killed.                                             **/  struct dynamically_allocated_data {    char *proxyuserpwd; /* free later if not NULL! */    char *uagent; /* free later if not NULL! */    char *accept_encoding; /* free later if not NULL! */    char *userpwd; /* free later if not NULL! */    char *rangeline; /* free later if not NULL! */    char *ref; /* free later if not NULL! */    char *cookie; /* free later if not NULL! */    char *host; /* free later if not NULL */    char *cookiehost; /* free later if not NULL */  } allocptr;  char *newurl; /* This can only be set if a Location: was in the		   document headers */#ifdef KRB4  enum protection_level command_prot;  enum protection_level data_prot;  enum protection_level request_data_prot;  size_t buffer_size;  struct krb4buffer in_buffer, out_buffer;  int sec_complete;  void *app_data;  struct Curl_sec_client_mech *mech;  struct sockaddr_in local_addr;#endif  /*************** Request - specific items ************/  /* previously this was in the urldata struct */  union {    struct HTTP *http;    struct HTTP *gopher; /* alias, just for the sake of being more readable */    struct HTTP *https;  /* alias, just for the sake of being more readable */    struct FTP *ftp;    struct FILE *file;    void *telnet;        /* private for telnet.c-eyes only */#if 0 /* no need for special ones for these: */    struct LDAP *ldap;    struct DICT *dict;#endif    void *generic;  } proto;  /* This struct is inited when needed */  struct Curl_transfer_keeper keep;  /* 'upload_present' is used to keep a byte counter of how much data there is     still left in the buffer, aimed for upload. */  int upload_present;   /* 'upload_fromhere' is used as a read-pointer when we uploaded parts of a      buffer, so the next read should read from where this pointer points to,      and the 'upload_present' contains the number of bytes available at this      position */  char *upload_fromhere;  curl_read_callback fread; /* function that reads the input */  void *fread_in;           /* pointer to pass to the fread() above */  struct ntlmdata ntlm;     /* NTLM differs from other authentication schemes                               because it authenticates connections, not                               single requests! */  struct ntlmdata proxyntlm; /* NTLM data for proxy */#ifdef USE_ARES  /* data used for the asynch name resolve callback */  struct Curl_async async;#endif};/* The end of connectdata. *//* * Struct to keep statistical and informational data. */struct PureInfo {  int httpcode;  /* Recent HTTP or FTP response code */  int httpproxycode;  int httpversion;  long filetime; /* If requested, this is might get set. Set to -1 if the time                    was unretrievable. We cannot have this of type time_t,                    since time_t is unsigned on several platforms such as                    OpenVMS. */  long header_size;  /* size of read header(s) in bytes */  long request_size; /* the amount of bytes sent in the request(s) */  long proxyauthavail;  long httpauthavail;  char *contenttype; /* the content type of the object */};struct Progress {  long lastshow; /* time() of the last displayed progress meter or NULL to                    force redraw at next call */  double size_dl;  double size_ul;  double downloaded;  double uploaded;  double current_speed; /* uses the currently fastest transfer */  bool callback;  /* set when progress callback is used */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -