⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tools.c

📁 网络爬虫程序
💻 C
📖 第 1 页 / 共 2 页
字号:
/***************************************************************************//*    This code is part of WWW grabber called pavuk                        *//*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          *//*    Distributed under GPL 2 or later                                     *//***************************************************************************/#include "config.h"#include <sys/time.h>#include <sys/types.h>#include <sys/stat.h>#include <sys/wait.h>#ifdef HAVE_FLOCK#include <sys/file.h>#endif#include <unistd.h>#include <stdio.h>#include <string.h>#include <stdlib.h>#include <errno.h>#include <netdb.h>#include <fcntl.h>#include <limits.h>#include <time.h>#include <utime.h>#include <dirent.h>#include <signal.h>#ifdef HAVE_TERMIOS#include <termios.h>#endif#ifdef HAVE_FNMATCH#include <fnmatch.h>#else#include "fnmatch.h"#endif#ifdef __CYGWIN__#include <sys/cygwin.h>#endif#ifdef GTK_FACE#include <glib.h>#endif#include "gui_api.h"#include "url.h"#include "doc.h"#include "ftp.h"#include "dinfo.h"#include "log.h"void strip_nl(char *str){  char *p;  p = strchr(str, '\n');  if(p)    *p = '\0';  p = strchr(str, '\r');  if(p)    *p = '\0';}void omit_chars(char *str, char *chars){  int src, dst;  for(src = 0, dst = 0; str[src]; src++)  {    if(strchr(chars, str[src]))      continue;    str[dst] = str[src];    dst++;  }  str[dst] = '\0';}#ifndef HAVE_SETENVint tl_setenv(const char *var, const char *val, int ovr){  char *pom = _malloc(strlen(var) + strlen(val) + 2);  sprintf(pom, "%s=%s", var, val);  return putenv(pom);}#endif#ifndef HAVE_INET6static char *xstrherror(int enr){  char *p;  switch (enr)  {#ifdef NETDB_INTERNAL  case NETDB_INTERNAL:    p = strerror(errno);    break;#endif#ifdef NETDB_SUCCESS  case NETDB_SUCCESS:    p = gettext("no error");    break;#endif#ifdef HOST_NOT_FOUND  case HOST_NOT_FOUND:    p = gettext("host not found");    break;#endif#ifdef TRY_AGAIN  case TRY_AGAIN:    p = gettext("temporary error (try again later)");    break;#endif#ifdef NO_RECOVERY  case NO_RECOVERY:    p = gettext("non recoverable error");    break;#endif#ifdef NO_ADDRESS  case NO_ADDRESS:    p = gettext("name is valid, but doesn't have an IP address");    break;#endif  default:    p = gettext("unknown hostname translation error");  }  return p;}#endifvoid xherror(const char *str){#ifdef HAVE_INET6  xprintf(1, "%s: %s\n", str, gai_strerror(_h_errno_));#else  xprintf(1, "%s: %s\n", str, xstrherror(_h_errno_));#endif}void xperror(const char *str){  xprintf(1, "%s: %s\n", str, strerror(errno));}#ifdef HAVE_MTstatic void st_xvaprintf(int log, const char *strs, va_list * args)#elsevoid xvaprintf(int log, const char *strs, va_list * args)#endif{#ifdef GTK_FACE  char *buf = g_strdup_vprintf(strs, *args);#else  char buf[4096];#ifdef HAVE_VSNPRINTF  int l = vsnprintf(buf, sizeof(buf), strs, *args);  if(TL_BETWEEN(l, 0, sizeof(buf) - 1))    buf[l] = '\0';#else  vsprintf(buf, strs, *args);#endif#endif  if(log && cfg.logfile)    log_str(buf);#ifdef I_FACE  if(cfg.xi_face && cfg.done && !cfg.quiet)  {    gui_xprint(buf);  }  else#endif  {    if(!cfg.quiet && !cfg.bgmode)    {      bool_t wout = TRUE;#ifdef HAVE_TERMIOS      if(cfg.tccheck)      {        static int _istty = -1;        static pid_t _pgrp;        if(_istty == -1)        {          _istty = isatty(1);#ifdef GETPGRP_NEED_PID          _pgrp = getpgrp(getpid());#else          _pgrp = getpgrp();#endif        }        if(_istty && tcgetpgrp(1) != _pgrp)          wout = FALSE;      }#endif      if(wout)      {        fwrite(buf, sizeof(char), strlen(buf), stdout);        fflush(stdout);      }    }  }#ifdef GTK_FACE  g_free(buf);#endif}#ifdef HAVE_MTvoid xvaprintf(int log, const char *strs, va_list * args){  doc *cdoc;  cdoc = (doc *) pthread_getspecific(cfg.currdoc_key);  if(!cdoc || (cfg.nthr == 1) || cfg.immessages)    st_xvaprintf(log, strs, args);  else if(cdoc)  {    doc_msg *dm = _malloc(sizeof(doc_msg));    char buf[4096];    buf[0] = '\0';    vsprintf(buf, strs, *args);    dm->log = log;    dm->msg = tl_strdup(buf);    cdoc->msgbuf = dllist_append(cdoc->msgbuf, dm);  }  else    st_xvaprintf(log, strs, args);}#endifvoid xprintf(int log, const char *strs, ...){  va_list args;  va_start(args, strs);  xvaprintf(log, strs, &args);  va_end(args);}void xdebug(int level, const char *strs, ...){#ifdef HAVE_MT  static pthread_mutex_t unique_lock = PTHREAD_MUTEX_INITIALIZER;  static volatile int unique = 0;#endif#ifdef DEBUG  if(cfg.debug && (level & cfg.debug_level))  {    va_list args;    va_start(args, strs);#ifdef HAVE_MT    pthread_mutex_lock(&unique_lock);    printf("%4d ", unique++);    st_xvaprintf(1, strs, &args);    pthread_mutex_unlock(&unique_lock);#else    xvaprintf(1, strs, &args);#endif    va_end(args);  }#endif /* DEBUG */}void xvadebug(int level, const char *strs, va_list * args){#ifdef DEBUG  if(cfg.debug && (level & cfg.debug_level))  {    xvaprintf(1, strs, args);  }#endif}unsigned int hash_func(const char *str, int num){  const char *p = str;  unsigned int rv = 0;  while(*p)  {    rv = (rv + (unsigned char) *p) % num;    p++;  }  rv = rv % num;  return rv;}static void report_unsup_locking(void){  static int visited = FALSE;  if(!visited)  {    visited = TRUE;    xprintf(1,      "------------------------------------------------------------------------------\n");    xprintf(1,      gettext      ("Warning: locking not supported ... don't run multiple processes or threads!\n"));    xprintf(1,      "------------------------------------------------------------------------------\n");  }}int tl_flock(int *fd, const char *filename, int opt, int b_lock){  int i = 0;/* currently it seemds to me that BeOS  *//* doesn't support file locking :-(     *//* so just without real locking, report *//* successfully acquired lock           */#ifdef __BEOS__  report_unsup_locking();  return 0;#endif  DEBUG_LOCKS("Locking file - %s\n", filename);  if(b_lock)  {    bool_t ready = FALSE;    while(!ready)    {#ifdef HAVE_FLOCK      if(flock(*fd, LOCK_EX | LOCK_NB))      {        if(errno == EWOULDBLOCK)        {          xprintf(1, gettext("waiting to release lock on FD : %d\n"), *fd);          i = flock(*fd, LOCK_EX);        }        else if(errno == ENOSYS ||#ifdef ENOTSUP          errno == ENOTSUP ||#endif          errno == EOPNOTSUPP)        {          report_unsup_locking();          break;        }        else          perror(filename);      }      else      {        i = 0;      }#else#ifdef HAVE_FCNTL_LOCK      struct flock fl;      memset(&fl, '\0', sizeof(fl));      fl.l_type = F_WRLCK;      if(fcntl(*fd, F_SETLK, &fl))      {        if(errno == EWOULDBLOCK)        {          xprintf(1, gettext("waiting to release lock on FD : %d\n"), *fd);          memset(&fl, '\0', sizeof(fl));          fl.l_type = F_WRLCK;          i = fcntl(*fd, F_SETLKW, &fl);        }        else if(errno == ENOSYS ||#ifdef ENOTSUP          errno == ENOTSUP ||#endif          errno == EOPNOTSUPP)        {          report_unsup_locking();          break;        }        else          perror(filename);      }      else      {        i = 0;      }#endif#endif      if(access(filename, F_OK))      {        DEBUG_LOCKS("Lock file was removed - creating new one.\n");        close(*fd);        if(!makealldirs(filename))        {          *fd = open(filename, opt, 0644);          if(*fd < 0)          {            i = -1;            ready = TRUE;          }        }        else        {          i = -1;          ready = TRUE;        }      }      else        ready = TRUE;    }  }  else  {#ifdef HAVE_FLOCK    i = flock(*fd, LOCK_EX | LOCK_NB);#else#ifdef HAVE_FCNTL_LOCK    struct flock fl;    memset(&fl, '\0', sizeof(fl));    fl.l_type = F_WRLCK;    i = fcntl(*fd, b_lock ? F_SETLKW : F_SETLK, &fl);#endif#endif    if(i < 0 && (errno == ENOSYS ||#ifdef ENOTSUP        errno == ENOTSUP ||#endif        errno == EOPNOTSUPP))    {      report_unsup_locking();      i = 0;    }    else if(i < 0)      perror(filename);  }  return i;}#ifdef HAVE_FCNTL_LOCKint tl_funlock(int fd){  struct flock fl;  int i;  memset(&fl, '\0', sizeof(fl));  fl.l_type = F_UNLCK;  i = fcntl(fd, F_SETLK, &fl);  return i;}#endifint tl_mkstemp(char *pattern){#ifdef HAVE_MKSTEMP  return mkstemp(pattern);#else  tmpnam(pattern);  return open(pattern, O_CREAT | O_RDWR | O_TRUNC | O_EXCL | O_BINARY, 0600);#endif}long int _atoi(char *str){  char *__eptr__;  long int rv;  if(!*str)  {    errno = ERANGE;    rv = 0;  }  else  {    errno = 0;    rv = strtol(str, (char **) &__eptr__, 10);    if(*__eptr__ != '\0')      errno = ERANGE;    else      errno = 0;  }  return rv;}double _atof(char *str){  char *__eptr__;  double rv;  if(!*str)  {    errno = ERANGE;    rv = 0.0;  }  else  {    errno = 0;    rv = strtod(str, (char **) &__eptr__);    if(*__eptr__ != '\0')      errno = ERANGE;    else      errno = 0;  }  return rv;}char *_strtrchr(char *str, int cfrom, int cto){  char *p = str;  while((p = strchr(p, cfrom)))    *p = cto;  return str;}void *_malloc(int sz){  void *ret = malloc(sz);  if(!ret)  {    perror("malloc");  }  return ret;}void *_realloc(void *from, int sz){  void *ret = from ? realloc(from, sz) : malloc(sz);  if(!ret)  {    perror("realloc");  }  return ret;}char *get_1qstr(const char *str){  static const char *p = 0;  static char pom[4096];  const char *pom1 = NULL, *pom2 = NULL;  int found;  bool_t wasesc = FALSE;  if(str)    p = str;  for(; p && *p && (!pom1 || !pom2); p++)  {    found = FALSE;    if(!wasesc && *p == '\"')      found = TRUE;    if(!wasesc && *p == '\\')      wasesc = TRUE;    else      wasesc = FALSE;    if(!pom1 && found)      pom1 = p + 1;    else if(!pom2 && found)      pom2 = p;  }  if(pom1 && pom2)  {    char *p2;    p2 = pom;    while(pom1 < pom2)    {      if(*pom1 == '\\')      {        pom1++;        *p2 = *pom1;      }      else      {        *p2 = *pom1;      }      p2++;      pom1++;    }    *p2 = '\0';    return pom;  }  else  {    p = NULL;    return NULL;  }}char *escape_str(char *str, char *unsafe){  char sbuf[4096];  char *p, *r;  for(p = str, r = sbuf; *p; p++)  {    if(strchr(unsafe, *p))    {      *r = '\\';      r++;      *r = *p;      r++;    }    else    {      *r = *p;      r++;    }  }  *r = '\0';  return tl_strdup(sbuf);}/**************************************************************//* implementation of standard function strtok                 *//* split string at occurence of <chr> by setting \0-character *//* store next element (one behind \0) pointer for next call   *//* calling with str equal to zero uses stored pointer         *//**************************************************************/char *strtokc_r(char *str, int chr, char **save){  char *ret;  if(str)    *save = str;  if((ret = *save))  {    char *pom;    if((pom = strchr(*save, chr)))    {      *pom = '\0';      *save = pom + 1;    }    else      *save = NULL;  }  return ret;}/**************************************//* najdi poziciu n-teho vyskytu znaku *//* FIXME: Translate me!               *//**************************************/char *strfindnchr(char *str, int chr, int n){  int cnt;  char *p;  for(p = str, cnt = 0; *p && cnt < n; p++)  {    if(*p == chr)      cnt++;  }  if(cnt != n)    return NULL;  else    return p;}/********************************************//* search in list for string occurrence     *//********************************************/bool_t is_in_list(char *str, char **list){  char **p = list;  while(*p)  {    if(!strcasecmp(*p, str))      return TRUE;    p++;  }  return FALSE;}bool_t is_in_dllist(char *str, dllist * list){  dllist *p;  for(p = list; p; p = p->next)  {    if(!strcasecmp((char *) p->data, str))      return TRUE;  }  return FALSE;}/****************************************************//* match string again list of wildcard patterns     *//****************************************************/bool_t is_in_pattern_list(char *str, char **list){  char **p = list;  while(*p)  {    if(!fnmatch(*p, str, 0))      return TRUE;    p++;  }  return FALSE;}bool_t is_in_pattern_dllist(char *str, dllist * list){  dllist *p;  for(p = list; p; p = p->next)  {    if(!fnmatch((char *) p->data, str, 0))      return TRUE;  }  return FALSE;}/*******************************************************//* split string to NULL terminated array of strings    *//* separated with some of characters in sep            *//*******************************************************/char **tl_str_split(const char *liststr, const char *sep){  const char *p;  int i = 0;  char **ret_val = NULL;  int ilen;  if(!liststr || !*liststr)    return NULL;  ret_val = _malloc(sizeof(char **));  ret_val[0] = NULL;  p = liststr;  while(*p)  {    ilen = strcspn(p, sep);    ret_val = _realloc(ret_val, sizeof(char **) * (i + 2));    ret_val[i] = tl_strndup(p, ilen);    ret_val[i + 1] = NULL;    p += ilen;    if(*p)      p++;    i++;  }  return ret_val;}dllist *tl_numlist_split(char *str, char *sep){  dllist *rv = NULL;  char **v;  int i;  v = tl_str_split(str, sep);  for(i = 0; v && v[i]; i++)  {    long n = _atoi(v[i]);    if(errno == ERANGE)    {      while(rv)        rv = dllist_remove_entry(rv, rv);      break;    }    rv = dllist_append(rv, (dllist_t) n);  }  tl_strv_free(v);  return rv;}/* free null terminated string vector */void tl_strv_free(char **v){  int i;  for(i = 0; v && v[i]; i++)    _free(v[i]);  _free(v);}/* count length of null terminated string vector */int tl_strv_length(char **v){  int i;  for(i = 0; v && v[i]; i++);  return i;}int tl_strv_find(char **v, char *s){  int i;  for(i = 0; v && v[i]; i++)  {    if(!strcmp(v[i], s))      return i;  }  return -1;}static int sort_strcmp(const char **s1, const char **s2){  return strcmp(*s1, *s2);}void tl_strv_sort(char **v){  if(v)  {    qsort((void *) v, tl_strv_length(v), sizeof(char *),      (int (*)(const void *, const void *)) sort_strcmp);  }}/*************************************************//* change all characters in string to upper case *//*************************************************/char *upperstr(char *s){  char *p;  for(p = s; *p != '\0'; p++)    *p = tl_ascii_toupper(*p);  return s;}/*************************************************//* change all characters in string to lower case *//*************************************************/char *lowerstr(char *s){  char *p;  for(p = s; *p != '\0'; p++)    *p = tl_ascii_tolower(*p);  return s;}/**********************//* duplicate string   *//**********************/char *tl_strdup(const char *s){  char *p;  if(s)  {    p = (char *) _malloc(strlen(s) + 1);    strcpy(p, s);  }  else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -