stats.c

来自「网络爬虫程序」· C语言 代码 · 共 559 行 · 第 1/2 页

C
559
字号
/***************************************************************************//*    This code is part of WWW grabber called pavuk                        *//*    Copyright (c) 1997 - 2001 Stefan Ondrejicka                          *//*    Distributed under GPL 2 or later                                     *//***************************************************************************/#include "config.h"#include <string.h>#include <stdio.h>#include "stats.h"#ifdef GTK_FACE#include "gui.h"#include "gui_api.h"#endifint stats_fill_spage(const char *filename, void *lst){  char *pom, *p, p2[8192];  int total = 0, redir = 0, nsredir = 0, dnld = 0, noproc = 0, moved = 0;  int brokennr = 0, dexistnr = 0, ferrnr = 0, nferrnr = 0, unsnr = 0;  int startnr = 0;  int i;  dllist *parptr;  dllist *broken = NULL, *dexist = NULL, *ferr = NULL;  dllist *nferr = NULL, *uns = NULL, *start = NULL;  dllist *ptr;  url *urlp;  pom = p2;  if(!cfg.mode_started)    return -1;  LOCK_CFG_URLHASH;  switch (cfg.mode)  {  case MODE_NORMAL:  case MODE_LNUPD:  case MODE_SYNC:  case MODE_MIRROR:  case MODE_SINGLE:  case MODE_SREGET:  case MODE_RESUME:  case MODE_NOSTORE:    {      for(i = 0; i < cfg.url_hash_tbl->size; i++)      {        ptr = cfg.url_hash_tbl->nodes[i];        while(ptr)        {          url *urlp = (url *) ptr->data;          total++;          if(urlp->status & URL_ISSTARTING)          {            startnr++;            start = dllist_append(start, (dllist_t) urlp);          }          if(urlp->status & URL_PROCESSED)          {            if(urlp->status & URL_INNSCACHE)              nsredir++;            else if(urlp->status & URL_REDIRECT)              redir++;            else if(urlp->status & URL_DOWNLOADED)              dnld++;            else if(urlp->status & URL_MOVED)              moved++;            else if(urlp->status & URL_TRUNCATED)            {              broken = dllist_append(broken, (dllist_t) urlp);              brokennr++;            }            else if(urlp->status & URL_NOT_FOUND)            {              dexist = dllist_append(dexist, (dllist_t) urlp);              dexistnr++;            }            else if(urlp->status & URL_ERR_UNREC)            {              ferr = dllist_append(ferr, (dllist_t) urlp);              ferrnr++;            }            else if(urlp->status & URL_ERR_REC)            {              nferr = dllist_append(nferr, (dllist_t) urlp);              nferrnr++;            }            else            {              uns = dllist_append(uns, (dllist_t) urlp);              unsnr++;            }          }          else            noproc++;          ptr = ptr->next;        }      }    }    if(filename)    {      FILE *f;#define DMP_URLLIST(lst)\      ptr = lst;\      while(ptr)\      {\        urlp = (url *)ptr->data;\        p = url_to_urlstr(urlp, FALSE);\        fprintf(f, "        %s\n", p);\        _free(p);\        LOCK_URL(urlp);\        for (parptr = urlp->parent_url; parptr ; parptr = parptr->next)\        {\          p = url_to_urlstr((url *)parptr->data, FALSE);\          fprintf(f, "                %s\n", p);\          _free(p);\        }\        UNLOCK_URL(urlp);\        ptr = ptr->next;\      }\      f = fopen(filename, "wb+");      if(!f)      {        UNLOCK_CFG_URLHASH;        xperror(filename);        return -1;      }      fprintf(f, gettext("Total number of URLs in queue: %d\n"), total);      fprintf(f, gettext("Starting urls: %d\n"), startnr);      DMP_URLLIST(start);      if(noproc)        fprintf(f, gettext("Not processed yet: %d (%3d%%)\n"),          noproc, (int) (((float) noproc / (float) total) * 100.0));      i = dnld + nsredir + redir;      if(i)        fprintf(f, gettext("Processed OK: %d (%3d%%)\n"), i,          (int) (((float) i / (float) total) * 100.0));      if(redir)        fprintf(f, gettext("Loaded from local tree: %d (%3d%%)\n"), redir,          (int) (((float) redir / (float) total) * 100.0));      if(nsredir)        fprintf(f,          gettext("Loaded from Netscape browser cache dir: %d (%3d%%)\n"),          nsredir, (int) (((float) nsredir / (float) total) * 100.0));      if(dnld)        fprintf(f, gettext("Downloaded over network: %d (%3d%%)\n"), dnld,          (int) (((float) dnld / (float) total) * 100.0));      if(moved)        fprintf(f, gettext("Moved to another location: %d (%3d%%)\n"), moved,          (int) (((float) moved / (float) total) * 100.0));      if(broken)      {        fprintf(f, gettext("Downloaded truncated: %d (%3d%%)\n"), brokennr,          (int) (((float) brokennr / (float) total) * 100.0));        DMP_URLLIST(broken);      }      if(nferr)      {        fprintf(f, gettext("Non fatal errors: %d (%3d%%)\n"), nferrnr,          (int) (((float) nferrnr / (float) total) * 100.0));        DMP_URLLIST(nferr);      }      if(dexist)      {        fprintf(f, gettext("Not found documents: %d (%3d%%)\n"), dexistnr,          (int) (((float) dexistnr / (float) total) * 100.0));        DMP_URLLIST(dexist);      }      if(ferr)      {        fprintf(f, gettext("Documents with fatal errors: %d (%3d%%)\n"),          ferrnr, (int) (((float) ferrnr / (float) total) * 100.0));        DMP_URLLIST(ferr);      }      if(uns)      {        fprintf(f, gettext("Documents with unknown status: %d (%3d%%)\n"),          unsnr, (int) (((float) unsnr / (float) total) * 100.0));        DMP_URLLIST(uns);      }      fclose(f);#undef DMP_URLLIST    }#ifdef GTK_FACE/* here will be clist with active URL fields */    else if(cfg.xi_face)    {      GtkWidget *l = GTK_WIDGET(lst);#define DMP_URLLIST(lst, ajp)\      ptr = lst;\      while(ptr)\      {\        urlp = (url *)ptr->data;\        p = url_to_urlstr(urlp, FALSE);\        sprintf(pom, "        %s\n", p);\        _free(p);\        i = gtk_clist_append(GTK_CLIST(l), &pom);\        gtk_clist_set_row_data(GTK_CLIST(l), i, urlp);\        LOCK_URL(urlp);\        for (parptr = urlp->parent_url; ajp && parptr; parptr = parptr->next)\        {\          p = url_to_urlstr((url *)parptr->data, FALSE);\          sprintf(pom, "                %s\n", p);\          _free(p);\          gtk_clist_append(GTK_CLIST(l), &pom);\        }\        UNLOCK_URL(urlp);\        ptr = ptr->next;\      }\      sprintf(pom, gettext("Total number of URLs in queue: %d\n"), total);      gtk_clist_append(GTK_CLIST(l), &pom);      sprintf(pom, gettext("Starting urls: %d\n"), startnr);      gtk_clist_append(GTK_CLIST(l), &pom);      DMP_URLLIST(start, FALSE);      if(noproc)      {        sprintf(pom, gettext("Not processed yet: %d (%3d%%)\n"),          noproc, (int) (((float) noproc / (float) total) * 100.0));        gtk_clist_append(GTK_CLIST(l), &pom);      }      i = dnld + nsredir + redir;      if(i)      {        sprintf(pom, gettext("Processed OK: %d (%3d%%)\n"), i,          (int) (((float) i / (float) total) * 100.0));        gtk_clist_append(GTK_CLIST(l), &pom);      }      if(redir)      {        sprintf(pom, gettext("Loaded from local tree: %d (%3d%%)\n"), redir,          (int) (((float) redir / (float) total) * 100.0));        gtk_clist_append(GTK_CLIST(l), &pom);      }      if(nsredir)      {        sprintf(pom,          gettext("Loaded from Netscape browser cache dir: %d (%3d%%)\n"),          nsredir, (int) (((float) nsredir / (float) total) * 100.0));        gtk_clist_append(GTK_CLIST(l), &pom);      }      if(dnld)      {        sprintf(pom, gettext("Downloaded over network: %d (%3d%%)\n"), dnld,          (int) (((float) dnld / (float) total) * 100.0));        gtk_clist_append(GTK_CLIST(l), &pom);      }      if(moved)      {        sprintf(pom, gettext("Moved to another location: %d (%3d%%)\n"),          moved, (int) (((float) moved / (float) total) * 100.0));        gtk_clist_append(GTK_CLIST(l), &pom);      }      if(broken)      {        sprintf(pom, gettext("Downloaded truncated: %d (%3d%%)\n"), brokennr,          (int) (((float) brokennr / (float) total) * 100.0));        gtk_clist_append(GTK_CLIST(l), &pom);        DMP_URLLIST(broken, TRUE);      }      if(nferr)      {        sprintf(pom, gettext("Non fatal errors: %d (%3d%%)\n"), nferrnr,          (int) (((float) nferrnr / (float) total) * 100.0));        gtk_clist_append(GTK_CLIST(l), &pom);        DMP_URLLIST(nferr, TRUE);      }      if(dexist)      {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?