⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
/*************************************************************************** *                                  _   _ ____  _      *  Project                     ___| | | |  _ \| |     *                             / __| | | | |_) | |     *                            | (__| |_| |  _ <| |___  *                             \___|\___/|_| \_\_____| * * Copyright (C) 1998 - 2003, Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at http://curl.haxx.se/docs/copyright.html. *  * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYING file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * * $Id: main.c,v 1.210 2003/10/31 09:49:10 bagder Exp $ ***************************************************************************//* This is now designed to have its own local setup.h */#include "setup.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <sys/types.h>#include <sys/stat.h>#include <ctype.h>#include <errno.h>#include <curl/curl.h>#define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */#include <curl/mprintf.h>#include "urlglob.h"#include "writeout.h"#include "getpass.h"#ifdef USE_ENVIRONMENT#include "writeenv.h"#endif#define CURLseparator	"--_curl_--"#if defined(WIN32)&&!defined(__CYGWIN32__)#include <winsock.h>#endif#ifdef TIME_WITH_SYS_TIME/* We can include both fine */#include <sys/time.h>#include <time.h>#else#ifdef HAVE_SYS_TIME_H# include <sys/time.h>#else# include <time.h>#endif#endif#include "version.h"#ifdef HAVE_IO_H /* typical win32 habit */#include <io.h>#endif#ifdef HAVE_UNISTD_H#include <unistd.h>#endif#ifdef HAVE_FCNTL_H#include <fcntl.h>#endif#ifdef HAVE_UTIME_H#include <utime.h>#else#ifdef HAVE_SYS_UTIME_H#include <sys/utime.h>#endif#endif#ifdef HAVE_SYS_POLL_H#include <sys/poll.h>#endif/* The last #include file should be: */#ifdef CURLDEBUG/* This is low-level hard-hacking memory leak tracking and similar. Using   the library level code from this client-side is ugly, but we do this   anyway for convenience. */#include "../lib/memdebug.h"#endif#define DEFAULT_MAXREDIRS  50L#ifdef __DJGPP__void *xmalloc(size_t);char *msdosify(char *);char *rename_if_dos_device_name(char *);void xfree(void *);#include <limits.h>#include <fcntl.h>struct pollfd {       int fd;       int events;     /* in param: what to poll for */       int revents;    /* out param: what events occured */     };int poll (struct pollfd *, int, int);#endif /* __DJGPP__ */#ifndef __cplusplus        /* (rabe) */#ifndef typedef_booltypedef char bool;#endif#endif                     /* (rabe) */#define CURL_PROGRESS_STATS 0 /* default progress display */#define CURL_PROGRESS_BAR   1typedef enum {  HTTPREQ_UNSPEC,  HTTPREQ_GET,  HTTPREQ_HEAD,  HTTPREQ_POST,  HTTPREQ_SIMPLEPOST,  HTTPREQ_CUSTOM,  HTTPREQ_LAST} HttpReq;/* Just a set of bits */#define CONF_DEFAULT  0#define CONF_AUTO_REFERER (1<<4) /* the automatic referer-system please! */#define CONF_VERBOSE  (1<<5) /* talk a lot */#define CONF_HEADER   (1<<8) /* throw the header out too */#define CONF_NOPROGRESS (1<<10) /* shut off the progress meter */#define CONF_NOBODY   (1<<11) /* use HEAD to get http document */#define CONF_FAILONERROR (1<<12) /* no output on http error codes >= 300 */#define CONF_FTPLISTONLY (1<<16) /* Use NLST when listing ftp dir */#define CONF_FTPAPPEND (1<<20) /* Append instead of overwrite on upload! */#define CONF_NETRC    (1<<22)  /* read user+password from .netrc */#define CONF_FOLLOWLOCATION (1<<23) /* use Location: Luke! */#define CONF_GETTEXT  (1<<24) /* use ASCII/text for transfer */#define CONF_HTTPPOST (1<<25) /* multipart/form-data HTTP POST */#define CONF_MUTE     (1<<28) /* force NOPROGRESS */#define CONF_NETRC_OPT (1<<29)  /* read user+password from either                                 * .netrc or URL*/#define CONF_UNRESTRICTED_AUTH (1<<30)/* Send authentication (user+password) when following * locations, even when hostname changed */#ifndef HAVE_STRDUP/* Ultrix doesn't have strdup(), so make a quick clone: */char *strdup(char *str){  int len;  char *newstr;  len = strlen(str);  newstr = (char *) malloc((len+1)*sizeof(char));  if (!newstr)    return (char *)NULL;  strcpy(newstr,str);  return newstr;}#endif #ifdef WIN32#include <direct.h>#define F_OK 0#define mkdir(x,y) (mkdir)(x)#endif#ifdef	VMSint	vms_show = 0;#define	FAC_CURL	0xC01#define	FAC_SYSTEM	0#define	MSG_NORMAL	0#define	VMS_STS(c,f,e,s) (((c&0xF)<<28)|((f&0xFFF)<<16)|((e&0x1FFF)<3)|(s&7))#define	VMSSTS_HIDE	VMS_STS(1,0,0,0)#define	SEV_WARNING	0#define	SEV_SUCCESS	1#define	SEV_ERROR	2#define	SEV_INFO	3	/* success, with an extra hint */#define	SEV_FATAL	4globalvalue int  CURL_UNSUPPROTO;		/* these are from curlmsg.msg file..... */globalvalue int  CURL_FAILINIT;globalvalue int  CURL_BADURLSYN;globalvalue int  CURL_BADURLUSER;globalvalue int  CURL_BADPROXY;globalvalue int  CURL_BADHOST;globalvalue int  CURL_FAILHOST;globalvalue int  CURL_FTPUNKREPLY;globalvalue int  CURL_FTPNOACC;globalvalue int  CURL_FTPUSRPW;globalvalue int  CURL_FTPBADPASS;globalvalue int  CURL_FTPBADUSER;globalvalue int  CURL_FTPBADPASV;globalvalue int  CURL_FTPBAD227;globalvalue int  CURL_FTPBADHOST227;globalvalue int  CURL_FTPNORECONN;globalvalue int  CURL_FTPNOBIN;globalvalue int  CURL_PARTIALFILE;globalvalue int  CURL_FTPNORETR;globalvalue int  CURL_FTPWRITERR;globalvalue int  CURL_FTPNOQUOTE;globalvalue int  CURL_HTTPPNF;globalvalue int  CURL_WRITERR;globalvalue int  CURL_BADUSER;globalvalue int  CURL_FTPNOSTOR;globalvalue int  CURL_READERR;globalvalue int  CURL_OUTOFMEM;globalvalue int  CURL_TIMEOUT;globalvalue int  CURL_FTPNOASCII;globalvalue int  CURL_FTPNOPORT;globalvalue int  CURL_FTPNOREST;globalvalue int  CURL_FTPNOSIZE;globalvalue int  CURL_HTTPRNGERR;globalvalue int  CURL_HTTPPOSTERR;globalvalue int  CURL_SSLNOCONN;globalvalue int  CURL_FTPBADRESUME;globalvalue int  CURL_FILENOACC;globalvalue int  CURL_LDAPNOBIND;globalvalue int  CURL_LDAPNOSRCH;globalvalue int  CURL_LDAPNOLIB;globalvalue int  CURL_LDAPNOFUNC;globalvalue int  CURL_ABORTCB;globalvalue int  CURL_BADPARAM;globalvalue int  CURL_BADORDER;globalvalue int  CURL_BADPWD;globalvalue int  CURL_MNYREDIR;globalvalue int  CURL_UNKTELNET;globalvalue int  CURL_UNKMSG;globalvalue int  CURL_BADSSLCERT;globalvalue int  CURL_SRVNOERR;globalvalue int  CURL_MAXMSG;long	vms_cond[] = {	VMS_STS(1,FAC_SYSTEM,MSG_NORMAL,SEV_SUCCESS),	CURL_UNSUPPROTO,		/* these are from curlmsg.msg file..... */	CURL_FAILINIT,	CURL_BADURLSYN,	CURL_BADURLUSER,	CURL_BADPROXY,	CURL_BADHOST,	CURL_FAILHOST,	CURL_FTPUNKREPLY,	CURL_FTPNOACC,	CURL_FTPUSRPW,	CURL_FTPBADPASS,	CURL_FTPBADUSER,	CURL_FTPBADPASV,	CURL_FTPBAD227,	CURL_FTPBADHOST227,	CURL_FTPNORECONN,	CURL_FTPNOBIN,	CURL_PARTIALFILE,	CURL_FTPNORETR,	CURL_FTPWRITERR,	CURL_FTPNOQUOTE,	CURL_HTTPPNF,	CURL_WRITERR,	CURL_BADUSER,	CURL_FTPNOSTOR,	CURL_READERR,	CURL_OUTOFMEM,	CURL_TIMEOUT,	CURL_FTPNOASCII,	CURL_FTPNOPORT,	CURL_FTPNOREST,	CURL_FTPNOSIZE,	CURL_HTTPRNGERR,	CURL_HTTPPOSTERR,	CURL_SSLNOCONN,	CURL_FTPBADRESUME,	CURL_FILENOACC,	CURL_LDAPNOBIND,	CURL_LDAPNOSRCH,	CURL_LDAPNOLIB,	CURL_LDAPNOFUNC,	CURL_ABORTCB,	CURL_BADPARAM,	CURL_BADORDER,	CURL_BADPWD,	CURL_MNYREDIR,	CURL_UNKTELNET,	CURL_UNKMSG,	CURL_BADSSLCERT,	CURL_SRVNOERR,	CURL_MAXMSG};#endifextern void hugehelp(void);/* * This is the main global constructor for the app. Call this before * _any_ libcurl usage. If this fails, *NO* libcurl functions may be * used, or havoc may be the result. */CURLcode main_init(void){  return curl_global_init(CURL_GLOBAL_DEFAULT);}/* * This is the main global destructor for the app. Call this after * _all_ libcurl usage is done. */void main_free(void){  curl_global_cleanup();}int SetHTTPrequest(HttpReq req, HttpReq *store){  if((*store == HTTPREQ_UNSPEC) ||     (*store == req)) {    *store = req;    return 0;  }  fprintf(stderr, "You can only select one HTTP request!\n");  return 1;}static void helpf(const char *fmt, ...){  va_list ap;  if(fmt) {    va_start(ap, fmt);    fputs("curl: ", stderr); /* prefix it */    vfprintf(stderr, fmt, ap);    va_end(ap);  }  fprintf(stderr, "curl: try 'curl --help' or "          "'curl --manual' for more information\n");}/* * A chain of these nodes contain URL to get and where to put the URL's * contents. */struct getout {  struct getout *next; /* next one */  char *url;     /* the URL we deal with */  char *outfile; /* where to store the output */  char *infile;  /* file to upload, if GETOUT_UPLOAD is set */  int flags;     /* options */};#define GETOUT_OUTFILE (1<<0)   /* set when outfile is deemed done */#define GETOUT_URL     (1<<1)   /* set when URL is deemed done */#define GETOUT_USEREMOTE (1<<2) /* use remote file name locally */#define GETOUT_UPLOAD  (1<<3)   /* if set, -T has been used */#define GETOUT_NOUPLOAD  (1<<4) /* if set, -T "" has been used */static void help(void){  int i;  const char *help[]={    "Usage: curl [options...] <url>",    "Options: (H) means HTTP/HTTPS only, (F) means FTP only",    " -a/--append        Append to target file when uploading (F)",    " -A/--user-agent <string> User-Agent to send to server (H)",    "    --anyauth       Tell curl to choose authentication method (H)",    " -b/--cookie <name=string/file> Cookie string or file to read cookies from (H)",    "    --basic         Enable HTTP Basic Authentication (H)",    " -B/--use-ascii     Use ASCII/text transfer",    " -c/--cookie-jar <file> Write cookies to this file after operation (H)",    " -C/--continue-at <offset> Resumed transfer offset",    " -d/--data <data>   HTTP POST data (H)",    "    --data-ascii <data>   HTTP POST ASCII data (H)",    "    --data-binary <data>  HTTP POST binary data (H)",    "    --negotiate     Enable HTTP Negotiate Authentication (H)",    "    --digest        Enable HTTP Digest Authentication (H)",    "    --disable-eprt  Prevent curl from using EPRT or LPRT (F)",    "    --disable-epsv  Prevent curl from using EPSV (F)",    " -D/--dump-header <file> Write the headers to this file",    "    --egd-file <file> EGD socket path for random data (SSL)",#ifdef USE_ENVIRONMENT    "    --environment   Write result codes to environment variables (RISC OS)",#endif    " -e/--referer       Referer URL (H)",    " -E/--cert <cert[:passwd]> Client certificate file and password (SSL)",    "    --cert-type <type> Certificate file type (DER/PEM/ENG) (SSL)",    "    --key <key>     Private key file name (SSL)",    "    --key-type <type> Private key file type (DER/PEM/ENG) (SSL)",    "    --pass  <pass>  Pass phrase for the private key (SSL)",    "    --engine <eng>  Crypto engine to use (SSL)",    "    --cacert <file> CA certificate to verify peer against (SSL)",    "    --capath <directory> CA directory (made using c_rehash) to verify",    "                    peer against (SSL)",    "    --ciphers <list> SSL ciphers to use (SSL)",    "    --compressed    Request compressed response (using deflate or gzip)",    "    --connect-timeout <seconds> Maximum time allowed for connection",    "    --create-dirs   Create necessary local directory hierarchy",    "    --crlf          Convert LF to CRLF in upload",    " -f/--fail          Fail silently (no output at all) on errors (H)",    "    --ftp-create-dirs Create the remote dirs if not present (F)",    " -F/--form <name=content> Specify HTTP multipart POST data (H)",    " -g/--globoff       Disable URL sequences and ranges using {} and []",    " -G/--get           Send the -d data with a HTTP GET (H)",    " -h/--help          This help text",    " -H/--header <line> Custom header to pass to server (H)",    " -i/--include       Include protocol headers in the output (H/F)",    " -I/--head          Show document info only",    " -j/--junk-session-cookies Ignore session cookies read from file (H)",    "    --interface <interface> Specify network interface to use",    "    --krb4 <level>  Enable krb4 with specified security level (F)",    " -k/--insecure      Allow curl to connect to SSL sites without certs (H)",    " -K/--config        Specify which config file to read",    " -l/--list-only     List only names of an FTP directory (F)",    "    --limit-rate <rate> Limit transfer speed to this rate",    " -L/--location      Follow Location: hints (H)",    "    --location-trusted Follow Location: and send authentication even ",    "                    to other hostnames (H)",    " -m/--max-time <seconds> Maximum time allowed for the transfer",    "    --max-redirs <num> Maximum number of redirects allowed (H)",    "    --max-filesize <bytes> Maximum file size to download (H/F)",    " -M/--manual        Display the full manual",    " -n/--netrc         Must read .netrc for user name and password",    "    --netrc-optional Use either .netrc or URL; overrides -n",    "    --ntlm          Enable HTTP NTLM authentication (H)",    " -N/--no-buffer     Disable buffering of the output stream",    " -o/--output <file> Write output to <file> instead of stdout",    " -O/--remote-name   Write output to a file named as the remote file",    " -p/--proxytunnel   Operate through a HTTP proxy tunnel (using CONNECT)",    "    --proxy-ntlm    Enable NTLM authentication on the proxy (H)",    " -P/--ftpport <address> Use PORT with address instead of PASV (F)",    " -q                 If used as the first parameter disables .curlrc",    " -Q/--quote <cmd>   Send command(s) to server before file transfer (F)",    " -r/--range <range> Retrieve a byte range from a HTTP/1.1 or FTP server",    "    --random-file <file> File for reading random data from (SSL)",    " -R/--remote-time   Set the remote file's time on the local output",    " -s/--silent        Silent mode. Don't output anything",    " -S/--show-error    Show error. With -s, make curl show errors when they occur",    "    --stderr <file> Where to redirect stderr. - means stdout",    " -t/--telnet-option <OPT=val> Set telnet option",    "    --trace <file>  Dump a network/debug trace to the given file",    "    --trace-ascii <file> Like --trace but without the hex output",    " -T/--upload-file <file> Transfer/upload <file> to remote site",    "    --url <URL>     Another way to specify URL to work with",    " -u/--user <user[:password]> Specify user and password to use",    "                    Overrides -n and --netrc-optional",    " -U/--proxy-user <user[:password]> Specify Proxy authentication",    " -v/--verbose       Make the operation more talkative",

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -