⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
    " -V/--version       Show version number and quit",#ifdef __DJGPP__    "    --wdebug        Turn on WATT-32 debugging under DJGPP",#endif    " -w/--write-out [format] What to output after completion",    " -x/--proxy <host[:port]> Use HTTP proxy on given port",    " -X/--request <command> Specify request command to use",    " -y/--speed-time    Time needed to trig speed-limit abort. Defaults to 30",    " -Y/--speed-limit   Stop transfer if below speed-limit for 'speed-time' secs",    " -z/--time-cond <time> Transfer based on a time condition",    " -0/--http1.0       Use HTTP 1.0 (H)",    " -1/--tlsv1         Use TLSv1 (SSL)",    " -2/--sslv2         Use SSLv2 (SSL)",    " -3/--sslv3         Use SSLv3 (SSL)",    " -4/--ipv4          Resolve name to IPv4 address",    " -6/--ipv6          Resolve name to IPv6 address",    " -#/--progress-bar  Display transfer progress as a progress bar",    NULL  };  for(i=0; help[i]; i++)    puts(help[i]);}struct LongShort {  const char *letter;  const char *lname;  bool extraparam;};struct Configurable {  bool remote_time;  char *random_file;  char *egd_file;  char *useragent;  char *cookie;     /* single line with specified cookies */  char *cookiejar;  /* write to this file */  char *cookiefile; /* read from this file */  bool cookiesession; /* new session? */  bool encoding;    /* Accept-Encoding please */  long authtype;    /* auth bitmask */    bool use_resume;  bool resume_from_current;  bool disable_epsv;  bool disable_eprt;  long resume_from;  char *postfields;  long postfieldsize;  char *referer;  long timeout;  long connecttimeout;  long maxredirs;  long max_filesize;  char *headerfile;  char *ftpport;  char *iface;  unsigned short porttouse;  char *range;  long low_speed_limit;  long low_speed_time;  bool showerror;  char *userpwd;  char *proxyuserpwd;  char *proxy;  bool configread;  bool proxytunnel;  long conf;  struct getout *url_list; /* point to the first node */  struct getout *url_last; /* point to the last/current node */  struct getout *url_get;  /* point to the node to fill in URL */  struct getout *url_out;  /* point to the node to fill in outfile */  char *cipher_list;  char *cert;  char *cert_type;  char *cacert;  char *capath;  char *key;  char *key_type;  char *key_passwd;  char *engine;  bool crlf;  char *customrequest;  char *krb4level;  char *trace_dump; /* file to dump the network trace to, or NULL */  FILE *trace_stream;  bool trace_fopened;  bool trace_ascii;  long httpversion;  bool progressmode;  bool nobuffer;  bool globoff;  bool use_httpget;  bool insecure_ok; /* set TRUE to allow insecure SSL connects */  bool create_dirs;  bool ftp_create_dirs;  bool proxyntlm;  char *writeout; /* %-styled format string to output */  bool writeenv; /* write results to environment, if available */  FILE *errors; /* if stderr redirect is requested */  bool errors_fopened;  struct curl_slist *quote;  struct curl_slist *postquote;  struct curl_slist *prequote;  long ssl_version;  long ip_version;  curl_TimeCond timecond;  time_t condtime;  struct curl_slist *headers;  struct curl_httppost *httppost;  struct curl_httppost *last_post;  struct curl_slist *telnet_options;          HttpReq httpreq;  /* for bandwidth limiting features: */  size_t sendpersecond; /* send to peer */  size_t recvpersecond; /* receive from peer */  time_t lastsendtime;  size_t lastsendsize;  time_t lastrecvtime;  size_t lastrecvsize;};/* global variable to hold info about libcurl */static curl_version_info_data *curlinfo;static int parseconfig(const char *filename,		       struct Configurable *config);static char *my_get_line(FILE *fp);static int create_dir_hierarchy(char *outfile);static void GetStr(char **string,		   char *value){  if(*string)    free(*string);  if(value)    *string = strdup(value);  else    *string = NULL;}static char *file2string(FILE *file){  char buffer[256];  char *ptr;  char *string=NULL;  int len=0;  int stringlen;  if(file) {    while(fgets(buffer, sizeof(buffer), file)) {      ptr= strchr(buffer, '\r');      if(ptr)        *ptr=0;      ptr= strchr(buffer, '\n');      if(ptr)        *ptr=0;      stringlen=strlen(buffer);      if(string)        string = realloc(string, len+stringlen+1);      else        string = malloc(stringlen+1);      strcpy(string+len, buffer);      len+=stringlen;    }    return string;  }  else    return NULL; /* no string */}static char *file2memory(FILE *file, long *size){  char buffer[1024];  char *string=NULL;  char *newstring=NULL;  long len=0;  long stringlen=0;  if(file) {    while((len = fread(buffer, 1, sizeof(buffer), file))) {      if(string) {        newstring = realloc(string, len+stringlen);        if(newstring)          string = newstring;        else          break; /* no more strings attached! :-) */      }      else        string = malloc(len);      memcpy(&string[stringlen], buffer, len);      stringlen+=len;    }    *size = stringlen;    return string;  }  else    return NULL; /* no string */}void clean_getout(struct Configurable *config){  struct getout *node=config->url_list;  struct getout *next;  while(node) {    next = node->next;    if(node->url)      free(node->url);    if(node->outfile)      free(node->outfile);    if(node->infile)      free(node->infile);    free(node);    node = next; /* GOTO next */  }}struct getout *new_getout(struct Configurable *config){  struct getout *node =malloc(sizeof(struct getout));  struct getout *last= config->url_last;  if(node) {    /* clear the struct */    memset(node, 0, sizeof(struct getout));            /* append this new node last in the list */    if(last)      last->next = node;    else      config->url_list = node; /* first node */                /* move the last pointer */    config->url_last = node;  }  return node;}/* Structure for storing the information needed to build a multiple files * section*/struct multi_files {  struct curl_forms   form;  struct multi_files *next;};/* Add a new list entry possibly with a type_name */static struct multi_files *AddMultiFiles (const char *file_name,               const char *type_name,               const char *show_filename,               struct multi_files **multi_start,               struct multi_files **multi_current){  struct multi_files *multi;  struct multi_files *multi_type = NULL;  struct multi_files *multi_name = NULL;  multi = (struct multi_files *)malloc(sizeof(struct multi_files));  if (multi) {    memset(multi, 0, sizeof(struct multi_files));    multi->form.option = CURLFORM_FILE;    multi->form.value = file_name;  }  else    return NULL;  if (!*multi_start)    *multi_start = multi;  if (type_name) {    multi_type = (struct multi_files *)malloc(sizeof(struct multi_files));    if (multi_type) {      memset(multi_type, 0, sizeof(struct multi_files));      multi_type->form.option = CURLFORM_CONTENTTYPE;      multi_type->form.value = type_name;      multi->next = multi_type;      multi = multi_type;    }    else {      free (multi);      return NULL;    }  }  if (show_filename) {    multi_name = (struct multi_files *)malloc(sizeof(struct multi_files));    if (multi_name) {      memset(multi_name, 0, sizeof(struct multi_files));      multi_name->form.option = CURLFORM_FILENAME;      multi_name->form.value = show_filename;      multi->next = multi_name;      multi = multi_name;    }    else {      free (multi);      return NULL;    }  }  if (*multi_current)    (*multi_current)->next = multi;  *multi_current = multi;  return *multi_current;}/* Free the items of the list. */static void FreeMultiInfo (struct multi_files *multi_start){  struct multi_files *multi;  while (multi_start) {    multi = multi_start;    multi_start = multi_start->next;    free (multi);  }}/*************************************************************************** * * formparse() *	 * Reads a 'name=value' paramter and builds the appropriate linked list. * * Specify files to upload with 'name=@filename'. Supports specified * given Content-Type of the files. Such as ';type=<content-type>'. * * You may specify more than one file for a single name (field). Specify * multiple files by writing it like: * * 'name=@filename,filename2,filename3' * * If you want content-types specified for each too, write them like: * * 'name=@filename;type=image/gif,filename2,filename3' * * If you want custom headers added for a single part, write them in a separate * file and do like this: * * 'name=foo;headers=@headerfile' or why not * 'name=@filemame;headers=@headerfile' * * To upload a file, but to fake the file name that will be included in the * formpost, do like this: * * 'name=@filename;filename=/dev/null' * * This function uses curl_formadd to fulfill it's job. Is heavily based on * the old curl_formparse code. * ***************************************************************************/#define FORM_FILE_SEPARATOR ','#define FORM_TYPE_SEPARATOR ';'static int formparse(char *input,                     struct curl_httppost **httppost,                     struct curl_httppost **last_post){  /* nextarg MUST be a string in the format 'name=contents' and we'll     build a linked list with the info */  char name[256];  char *contents;  char major[128];  char minor[128];  char *contp;  const char *type = NULL;  char *sep;  char *sep2;  /* Preallocate contents to the length of input to make sure we don't     overwrite anything. */  contents = malloc(strlen(input));  contents[0] = '\000';   if(1 <= sscanf(input, "%255[^=]=%[^\n]", name, contents)) {    /* the input was using the correct format */    contp = contents;    if('@' == contp[0]) {      struct multi_files *multi_start = NULL, *multi_current = NULL;      /* we use the @-letter to indicate file name(s) */      contp++;      multi_start = multi_current=NULL;      do {	/* since this was a file, it may have a content-type specifier	   at the end too, or a filename. Or both. */        char *ptr;        char *filename=NULL;	sep=strchr(contp, FORM_TYPE_SEPARATOR);	sep2=strchr(contp, FORM_FILE_SEPARATOR);	/* pick the closest */	if(sep2 && (sep2 < sep)) {	  sep = sep2;	  /* no type was specified! */	}        type = NULL;	if(sep) {	  /* if we got here on a comma, don't do much */	  if(FORM_FILE_SEPARATOR == *sep)	    ptr = NULL;	  else            ptr = sep+1;	  *sep=0; /* terminate file name at separator */	  while(ptr && (FORM_FILE_SEPARATOR!= *ptr)) {            /* pass all white spaces */            while(isspace((int)*ptr))              ptr++;            if(curl_strnequal("type=", ptr, 5)) {              /* set type pointer */              type = &ptr[5];	                  /* verify that this is a fine type specifier */              if(2 != sscanf(type, "%127[^/]/%127[^;,\n]",                             major, minor)) {                fprintf(stderr, "Illegally formatted content-type field!\n");                free(contents);                FreeMultiInfo (multi_start);                return 2; /* illegal content-type syntax! */              }              /* now point beyond the content-type specifier */              sep = (char *)type + strlen(major)+strlen(minor)+1;              *sep=0; /* zero terminate type string */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -