⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 config.h

📁 网络爬虫程序
💻 H
📖 第 1 页 / 共 2 页
字号:
#ifdef HAVE_REGEX  dllist *js_patterns;          /*** matching patterns for JS URLs ***/  dllist *js_transform;         /*** matching patterns for JS with transform **/  dllist *advert_res;           /*** list of RE-s for advertisement banners ***/  bool_t remove_adv;            /*** enable / disable advertisement banners ***/  dllist *dont_touch_url_rpattern;  dllist *dont_touch_tag_rpattern;                                /*** to allow preserve some URLs in the original form ***/#endif#ifdef DEBUG  bool_t debug;                 /*** debug mode on/off ***/  long debug_level;             /*** debug level ***/#endif#ifdef USE_SSL  long ssl_version;             /*** ssl2/ssl3/ssl23/tls1 version of ssl_client_method() ***/  char *ssl_proxy;              /*** SSL tuneling proxy ***/  long ssl_proxy_port;  char *ssl_cipher_list;  char *ssl_cert_passwd;  bool_t unique_sslid;          /*** use unique SSL IDs with each SSL connection ***/#ifdef USE_SSL_IMPL_OPENSSL  char *ssl_cert_file;  char *ssl_key_file;  char *egd_socket;             /*** path to EGD socket ***/#endif#ifdef USE_SSL_IMPL_NSS  char *nss_cert_dir;           /*** certDir for Netscape NSS ***/  bool_t nss_accept_unknown_cert;        /*** don't care much about certificates ***/  bool_t nss_domestic_policy;#endif#endif  char *language;               /*** language for LC_MESSAGES ***/#ifdef GETTEXT_NLS  char *msgcatd;                /*** explicit message catalog directory ***/#endif#ifdef I_FACE  char *fontname;               /*** default font used in interface ***/  long xlogsize;                /*** max number of lines in LOG widget ***/  bool_t log_autoscroll;        /*** autoscroll of log window ***/  bool_t run_iface;             /*** if immediately run download after start of pavuk in GUI interface ***/  bool_t use_prefs;             /*** store & load prefernces from ~/.pavuk_prefs file ***/                                /*** alternative icons for GUI ***/  char *bt_icon_cfg;  char *bt_icon_cfg_s;  char *bt_icon_lim;  char *bt_icon_lim_s;  char *bt_icon_gobg;  char *bt_icon_gobg_s;  char *bt_icon_rest;  char *bt_icon_rest_s;  char *bt_icon_cont;  char *bt_icon_cont_s;  char *bt_icon_stop;  char *bt_icon_stop_s;  char *bt_icon_brk;  char *bt_icon_brk_s;  char *bt_icon_exit;  char *bt_icon_exit_s;  char *bt_icon_mtb;  char *bt_icon_mtb_s;#ifdef WITH_TREE  char *browser;                /*** command to execute your preffered browser ***/#endif#endif/*** GLOBALdata ***/  abs_addr local_ip_addr;       /*** numeric address for local network interface ***/  time_t start_time;            /*** start time of downloading ***/  struct timeval hr_start_time; /*** high-resolution start time of downloading ***/  long trans_size;              /*** transfered size in session ***/  char *path_to_home;  char *local_host;             /*** hostname of local machine ***/  long fail_cnt;                /*** counter for failed transfers ---> return code of pavuk ***/  char *prg_path;               /*** path to pavuk executable == argv[0] ***/  char *install_path;           /*** pavuk install path especialy used in win32 version ***/  long total_cnt;               /*** total number of URLs in queue  ***/  long process_cnt;             /*** number of already processed documents ***/  long reject_cnt;              /*** number of rejected URLs ***/  pavuk_mode prev_mode;         /*** previous active mode ***/  bool_t mode_started;          /*** mode startup finisched ***/  bool_t rbreak;                /*** immediately stop transfer ***/  bool_t stop;                  /*** stop after this document will be processed ***/  dllist *urlstack;             /*** list of URLs in processing queue ***/  dllist *urls_in_dir;          /*** list of URLs extracted from mirroring                                     directory, for checking for nonexistent                                     document removal ***/  dlhash *url_hash_tbl;         /*** hash table for better performance URL lookup ***/  dlhash *fn_hash_tbl;          /*** hash table for better performance filename lookup ***/  dllist *last_used_proxy_node; /*** pointer to last used proxy node ***/  long docnr;                   /*** current number of document ***/#ifdef HAVE_MT  time_t timestamp;  time_t cfg_changed;           /*** timestamp for cfg struct last change ***/  pthread_key_t currdoc_key;  pthread_key_t herrno_key;  pthread_key_t thrnr_key;  pthread_key_t privcfg_key;  mt_semaphore nrunning_sem;  mt_semaphore urlstack_sem;  pthread_t mainthread;  pthread_t *allthreads;  long allthreadsnr;#endif#ifdef I_FACE  bool_t done;                  /*** was done startup ? ***/  bool_t processing;            /*** some URL is actualy in processing ***/#endif} _config_struct_t;extern _config_struct_t cfg;#if defined(HAVE_MT) && defined(I_FACE)#if 0/********************************************************************//* this structure contains corresponding field form _config_struct  *//* structure. when I don't want to use mutex(es) for locking of     *//* config structure when running multiple downloading threads, I    *//* I have to make copy of dynamicaly created config parameters to   *//* prevent segfaults when changing configuration from GUI           *//* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *//* !!!!!!!!!! Not realy used, only to know which fields !!!!!!!!!!! *//* !!!!!!!!!! are used from private copy                !!!!!!!!!!! *//* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! *//********************************************************************/typedef struct{  time_t timestamp;  char *default_prefix;  char *info_dir;  char *subdir;  char *cache_dir;  char *post_cmd;  char *http_proxy_pass;  char *http_proxy_user;  char *ftp_proxy_pass;  char *ftp_proxy_user;  char *ftp_proxy;  char *gopher_proxy;  char *name_auth;  char *passwd_auth;  char *index_name;  char *store_name;  char *from;  char *identity;  char *auth_ntlm_domain;  char *auth_proxy_ntlm_domain;  char *ftp_list_options;  char **accept_lang;  char **accept_chars;  char **cookies_disabled_domains;  char **dont_touch_url_pattern;  cond condition;  dllist *formdata;  dllist *lfnames;  dllist *http_headers;  dllist *js_patterns;  dllist *js_transform;  dllist *ftp_login_hs;  char *tr_del_chr;  char *tr_str_s1;  char *tr_str_s2;  char *tr_chr_s1;  char *tr_chr_s2;#ifdef HAVE_REGEX  dllist *advert_res;  dllist *js_patterns;  dllist *js_transform;  dllist *dont_touch_url_rpattern;  dllist *dont_touch_tag_rpattern;#endif#ifdef HAVE_MOZJS  char *js_script_file;#endif#ifdef HAVE_BDB_18x  char *ns_cache_dir;  char *moz_cache_dir;#endif#ifdef USE_SSL  char *ssl_proxy;  char *ssl_cipher_list;  char *ssl_cert_file;  char *ssl_key_file;  char *ssl_cert_passwd;  char *egd_socket;#endif} _config_struct_priv_t;#endif /* 0 */#define _config_struct_priv_t _config_struct_textern void privcfg_make_copy(_config_struct_priv_t *);extern void privcfg_free(_config_struct_priv_t *);#define priv_cfg (*((_config_struct_priv_t *)pthread_getspecific(cfg.privcfg_key)))#define _MT_CFGSTAMP    cfg.cfg_changed = time(NULL)#else#define priv_cfg cfg#define _MT_CFGSTAMP#endiftypedef enum{  PARAM_NUM,                    /* integer number                       */  PARAM_PBOOL,                  /* positive bool_tean                   */  PARAM_NBOOL,                  /* negative bool_tean                   */  PARAM_STR,                    /* single string                        */  PARAM_PASS,                   /* password string                      */  PARAM_STRLIST,                /* comma separated list of strings      */  PARAM_CONN,                   /* connection - host[:port]             */  PARAM_AUTHSCH,                /* authorization scheme - 1/2/3         */  PARAM_MODE,                   /* operation mode - mode.c              */  PARAM_PATH,                   /* file/dir path                        */  PARAM_TIME,                   /* time string - YYYY.MM.DD.HH:mm       */  PARAM_HTMLTAG,                /* HTML tags specification              */  PARAM_TWO_QSTR,               /* two quoted strings                   */  PARAM_DOUBLE,                 /* double number                        */  PARAM_LFNAME,                 /* for -fnrules option                  */  PARAM_RE,                     /* list of regular expressions          */  PARAM_USTRAT,                 /* url strategie - -strategie           */  PARAM_SSLVER,                 /* ssl version - ssl23/ssl2/ssl3/tls1   */  PARAM_HTTPHDR,                /* additional HTTP header               */  PARAM_DEBUGL,                 /* debug level - debugl.c               */  PARAM_REQUEST,                /* extended request specification       */  PARAM_PROXY,                  /* proxy specification - host:port      */  PARAM_TRANSPARENT,            /* proxy specification - host:port      */  PARAM_FUNC,                   /* exec function for this param type    */  PARAM_JSTRANS,                /* for -js_transform option             */  PARAM_NUMLIST,                /* list of integer numbers -[ad]port    */  PARAM_FTPHS,                  /* for FTP -ftp_login_handshake         */  PARAM_TAGPAT,                 /* for HTML tag patterns                */  PARAM_PORT_RANGE              /* for TCP/IP port ranges               */} par_type_t;/* this is to support parameters of foreign libraries (like gtk) */#define PARAM_FOREIGN           (1 << 29)/* this is for marking option as unsupported in current compile time    *//* configuration. This will allow to accept unsupported option on       *//* commandline just throwing warning insted of trowing error and exit.  */#define PARAM_UNSUPPORTED       (1 << 30)typedef struct _cfg_param{  char *short_cmd;  char *long_cmd;  char *par_entry;  par_type_t type;  void *default_val;  void *val_adr;  void *mdefault_val;  void *mval_adr;  char *help;} cfg_param_t;extern char *get_strategie_label(strategie);extern void usage(void);extern void usage_short(void);extern void cfg_setup_default(void);extern void cfg_set_all_to_default(void);extern void cfg_setup_cmdln(int, char **);extern int cfg_load(const char *);extern void cfg_load_setup(void);extern int cfg_dump(const char *);extern int cfg_dump_cmd(const char *);extern int cfg_dump_cmd_fd(int);extern void cfg_free_params(void);extern int cfg_dump_pref(void);extern int cfg_load_pref(void);extern int cfg_get_num_params(cfg_param_t *);extern void pavuk_do_at_exit(void);#define PAVUK_EXIT_OK           0       /* everything goes as expected  */#define PAVUK_EXIT_CFG_ERR      1       /* configuration error          */#define PAVUK_EXIT_DOC_ERR      2       /* some of documents failed     */#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -