stats.c
来自「网络爬虫程序」· C语言 代码 · 共 559 行 · 第 1/2 页
C
559 行
/***************************************************************************//* This code is part of WWW grabber called pavuk *//* Copyright (c) 1997 - 2001 Stefan Ondrejicka *//* Distributed under GPL 2 or later *//***************************************************************************/#include "config.h"#include <string.h>#include <stdio.h>#include "stats.h"#ifdef GTK_FACE#include "gui.h"#include "gui_api.h"#endifint stats_fill_spage(const char *filename, void *lst){ char *pom, *p, p2[8192]; int total = 0, redir = 0, nsredir = 0, dnld = 0, noproc = 0, moved = 0; int brokennr = 0, dexistnr = 0, ferrnr = 0, nferrnr = 0, unsnr = 0; int startnr = 0; int i; dllist *parptr; dllist *broken = NULL, *dexist = NULL, *ferr = NULL; dllist *nferr = NULL, *uns = NULL, *start = NULL; dllist *ptr; url *urlp; pom = p2; if(!cfg.mode_started) return -1; LOCK_CFG_URLHASH; switch (cfg.mode) { case MODE_NORMAL: case MODE_LNUPD: case MODE_SYNC: case MODE_MIRROR: case MODE_SINGLE: case MODE_SREGET: case MODE_RESUME: case MODE_NOSTORE: { for(i = 0; i < cfg.url_hash_tbl->size; i++) { ptr = cfg.url_hash_tbl->nodes[i]; while(ptr) { url *urlp = (url *) ptr->data; total++; if(urlp->status & URL_ISSTARTING) { startnr++; start = dllist_append(start, (dllist_t) urlp); } if(urlp->status & URL_PROCESSED) { if(urlp->status & URL_INNSCACHE) nsredir++; else if(urlp->status & URL_REDIRECT) redir++; else if(urlp->status & URL_DOWNLOADED) dnld++; else if(urlp->status & URL_MOVED) moved++; else if(urlp->status & URL_TRUNCATED) { broken = dllist_append(broken, (dllist_t) urlp); brokennr++; } else if(urlp->status & URL_NOT_FOUND) { dexist = dllist_append(dexist, (dllist_t) urlp); dexistnr++; } else if(urlp->status & URL_ERR_UNREC) { ferr = dllist_append(ferr, (dllist_t) urlp); ferrnr++; } else if(urlp->status & URL_ERR_REC) { nferr = dllist_append(nferr, (dllist_t) urlp); nferrnr++; } else { uns = dllist_append(uns, (dllist_t) urlp); unsnr++; } } else noproc++; ptr = ptr->next; } } } if(filename) { FILE *f;#define DMP_URLLIST(lst)\ ptr = lst;\ while(ptr)\ {\ urlp = (url *)ptr->data;\ p = url_to_urlstr(urlp, FALSE);\ fprintf(f, " %s\n", p);\ _free(p);\ LOCK_URL(urlp);\ for (parptr = urlp->parent_url; parptr ; parptr = parptr->next)\ {\ p = url_to_urlstr((url *)parptr->data, FALSE);\ fprintf(f, " %s\n", p);\ _free(p);\ }\ UNLOCK_URL(urlp);\ ptr = ptr->next;\ }\ f = fopen(filename, "wb+"); if(!f) { UNLOCK_CFG_URLHASH; xperror(filename); return -1; } fprintf(f, gettext("Total number of URLs in queue: %d\n"), total); fprintf(f, gettext("Starting urls: %d\n"), startnr); DMP_URLLIST(start); if(noproc) fprintf(f, gettext("Not processed yet: %d (%3d%%)\n"), noproc, (int) (((float) noproc / (float) total) * 100.0)); i = dnld + nsredir + redir; if(i) fprintf(f, gettext("Processed OK: %d (%3d%%)\n"), i, (int) (((float) i / (float) total) * 100.0)); if(redir) fprintf(f, gettext("Loaded from local tree: %d (%3d%%)\n"), redir, (int) (((float) redir / (float) total) * 100.0)); if(nsredir) fprintf(f, gettext("Loaded from Netscape browser cache dir: %d (%3d%%)\n"), nsredir, (int) (((float) nsredir / (float) total) * 100.0)); if(dnld) fprintf(f, gettext("Downloaded over network: %d (%3d%%)\n"), dnld, (int) (((float) dnld / (float) total) * 100.0)); if(moved) fprintf(f, gettext("Moved to another location: %d (%3d%%)\n"), moved, (int) (((float) moved / (float) total) * 100.0)); if(broken) { fprintf(f, gettext("Downloaded truncated: %d (%3d%%)\n"), brokennr, (int) (((float) brokennr / (float) total) * 100.0)); DMP_URLLIST(broken); } if(nferr) { fprintf(f, gettext("Non fatal errors: %d (%3d%%)\n"), nferrnr, (int) (((float) nferrnr / (float) total) * 100.0)); DMP_URLLIST(nferr); } if(dexist) { fprintf(f, gettext("Not found documents: %d (%3d%%)\n"), dexistnr, (int) (((float) dexistnr / (float) total) * 100.0)); DMP_URLLIST(dexist); } if(ferr) { fprintf(f, gettext("Documents with fatal errors: %d (%3d%%)\n"), ferrnr, (int) (((float) ferrnr / (float) total) * 100.0)); DMP_URLLIST(ferr); } if(uns) { fprintf(f, gettext("Documents with unknown status: %d (%3d%%)\n"), unsnr, (int) (((float) unsnr / (float) total) * 100.0)); DMP_URLLIST(uns); } fclose(f);#undef DMP_URLLIST }#ifdef GTK_FACE/* here will be clist with active URL fields */ else if(cfg.xi_face) { GtkWidget *l = GTK_WIDGET(lst);#define DMP_URLLIST(lst, ajp)\ ptr = lst;\ while(ptr)\ {\ urlp = (url *)ptr->data;\ p = url_to_urlstr(urlp, FALSE);\ sprintf(pom, " %s\n", p);\ _free(p);\ i = gtk_clist_append(GTK_CLIST(l), &pom);\ gtk_clist_set_row_data(GTK_CLIST(l), i, urlp);\ LOCK_URL(urlp);\ for (parptr = urlp->parent_url; ajp && parptr; parptr = parptr->next)\ {\ p = url_to_urlstr((url *)parptr->data, FALSE);\ sprintf(pom, " %s\n", p);\ _free(p);\ gtk_clist_append(GTK_CLIST(l), &pom);\ }\ UNLOCK_URL(urlp);\ ptr = ptr->next;\ }\ sprintf(pom, gettext("Total number of URLs in queue: %d\n"), total); gtk_clist_append(GTK_CLIST(l), &pom); sprintf(pom, gettext("Starting urls: %d\n"), startnr); gtk_clist_append(GTK_CLIST(l), &pom); DMP_URLLIST(start, FALSE); if(noproc) { sprintf(pom, gettext("Not processed yet: %d (%3d%%)\n"), noproc, (int) (((float) noproc / (float) total) * 100.0)); gtk_clist_append(GTK_CLIST(l), &pom); } i = dnld + nsredir + redir; if(i) { sprintf(pom, gettext("Processed OK: %d (%3d%%)\n"), i, (int) (((float) i / (float) total) * 100.0)); gtk_clist_append(GTK_CLIST(l), &pom); } if(redir) { sprintf(pom, gettext("Loaded from local tree: %d (%3d%%)\n"), redir, (int) (((float) redir / (float) total) * 100.0)); gtk_clist_append(GTK_CLIST(l), &pom); } if(nsredir) { sprintf(pom, gettext("Loaded from Netscape browser cache dir: %d (%3d%%)\n"), nsredir, (int) (((float) nsredir / (float) total) * 100.0)); gtk_clist_append(GTK_CLIST(l), &pom); } if(dnld) { sprintf(pom, gettext("Downloaded over network: %d (%3d%%)\n"), dnld, (int) (((float) dnld / (float) total) * 100.0)); gtk_clist_append(GTK_CLIST(l), &pom); } if(moved) { sprintf(pom, gettext("Moved to another location: %d (%3d%%)\n"), moved, (int) (((float) moved / (float) total) * 100.0)); gtk_clist_append(GTK_CLIST(l), &pom); } if(broken) { sprintf(pom, gettext("Downloaded truncated: %d (%3d%%)\n"), brokennr, (int) (((float) brokennr / (float) total) * 100.0)); gtk_clist_append(GTK_CLIST(l), &pom); DMP_URLLIST(broken, TRUE); } if(nferr) { sprintf(pom, gettext("Non fatal errors: %d (%3d%%)\n"), nferrnr, (int) (((float) nferrnr / (float) total) * 100.0)); gtk_clist_append(GTK_CLIST(l), &pom); DMP_URLLIST(nferr, TRUE); } if(dexist) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?