log.c
来自「网络爬虫程序」· C语言 代码 · 共 481 行
C
481 行
/***************************************************************************//* This code is part of WWW grabber called pavuk *//* Copyright (c) 1997 - 2001 Stefan Ondrejicka *//* Distributed under GPL 2 or later *//***************************************************************************/#define _GNU_SOURCE#define __USE_GNU#include "config.h"#include <unistd.h>#include <time.h>#include <string.h>#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <stdio.h>#include <time.h>#include <math.h>#include "url.h"#include "doc.h"#include "tools.h"#include "log.h"#include "errcode.h"static void _log_str(char *);#ifndef HAVE_DPRINTF#include <stdarg.h>static int dprintf(int fd, const char *format, ...){ int n; va_list ap; static char buffer[1024]; va_start(ap, format); n = vsnprintf(buffer, 1024, format, ap); va_end(ap); return write(fd, buffer, n);}#endifstatic char *errcodetype(int ecode){ switch (ecode) { case ERR_NOERROR: return "OK"; case ERR_STORE_DOC: case ERR_FILE_OPEN: case ERR_DIR_URL: case ERR_UNKNOWN: case ERR_PROXY_CONNECT: case ERR_FTP_UNKNOWN: case ERR_FTP_BUSER: case ERR_FTP_BPASS: case ERR_HTTP_UNKNOWN: case ERR_HTTP_AUTH: case ERR_HTTP_PAY: case ERR_HTTP_BADRQ: case ERR_HTTP_FORB: case ERR_HTTP_SERV: case ERR_GOPHER_UNKNOWN: return "FATAL"; case ERR_LOCKED: case ERR_BIGGER: case ERR_NOMIMET: case ERR_BREAK: case ERR_OUTTIME: case ERR_SCRIPT_DISABLED: case ERR_SMALLER: case ERR_ZERO_SIZE: case ERR_PROCESSED: case ERR_UDISABLED: case ERR_RDISABLED: case ERR_FTP_NOREGET: case ERR_FTP_ACTUAL: case ERR_FTP_NOTRANSFER: case ERR_FTP_NOMDTM: case ERR_FTP_DIRNO: case ERR_HTTP_NOREGET: case ERR_HTTP_REDIR: case ERR_HTTP_ACTUAL: return "WARN"; case ERR_READ: case ERR_FTP_BDIR: case ERR_FTP_CONNECT: case ERR_FTP_DATACON: case ERR_FTP_GET: case ERR_FTP_NODIR: case ERR_FTP_TRUNC: case ERR_HTTP_CONNECT: case ERR_HTTP_SNDREQ: case ERR_HTTP_TRUNC: case ERR_HTTP_CYCLIC: case ERR_HTTP_NFOUND: case ERR_GOPHER_CONNECT: case ERR_HTTPS_CONNECT: return "ERR"; default: return "UNKNOWN"; }}void short_log(doc * docp, url * urlp){ int fd; char pom[1024]; char *p, *p1; time_t t = time(NULL); if(!cfg.short_logfile) return; LOCK_SLOG; fd = open(cfg.short_logfile, O_BINARY | O_CREAT | O_APPEND | O_WRONLY, S_IRUSR | S_IWUSR); if(fd < 0) { xperror("shortlog"); UNLOCK_SLOG; return; } if(_flock(fd, cfg.short_logfile, O_BINARY | O_CREAT | O_APPEND | O_WRONLY, TRUE)) { close(fd); UNLOCK_SLOG; return; } p1 = ctime(&t); p = strchr(p1, '\n'); if(p) *p = '\0'; snprintf(pom, sizeof(pom), "%d %s %d/%ld %s %d ", (int) getpid(), p1, docp->doc_nr, cfg.total_cnt, errcodetype(docp->errcode), docp->errcode); write(fd, pom, strlen(pom)); p = url_to_urlstr(urlp, FALSE); write(fd, p, strlen(p)); _free(p); if(urlp->parent_url) { write(fd, " ", 1); LOCK_URL(urlp); p = url_to_urlstr((url *) urlp->parent_url->data, FALSE); UNLOCK_URL(urlp); write(fd, p, strlen(p)); _free(p); } else { write(fd, " [none]", 7); } p = url_to_filename(urlp, FALSE); write(fd, " ", 1); write(fd, p, strlen(p)); sprintf(pom, " %ld", (long) docp->size); write(fd, pom, strlen(pom)); t = doc_etime(docp, FALSE); sprintf(pom, " %ld.%03ld", t / 1000, t % 1000); write(fd, pom, strlen(pom)); if(docp->mime) { int l = strcspn(docp->mime, "\r\n"); write(fd, " ", 1); write(fd, docp->mime, l); } write(fd, "\n", 1); _funlock(fd); close(fd); UNLOCK_SLOG;}static int time_relative_object(void){ return cfg.time_relative && !strcmp(cfg.time_relative, "object");}static int time_relative_program(void){ return cfg.time_relative && !strcmp(cfg.time_relative, "program");}static int log_num(int fd, const char *name, int width, long num){ int rv = -1; /* we print space before the number, but decrease * width to ensure there is always at least * one space before the number */ rv = dprintf(fd, " %*ld", width - 1, num); return rv;}static int log_time(int fd, const char *name, int width, doc * docp, const struct timeval *end, const struct timeval *begin){ long time_diff = -1; const struct timeval *relative = begin; if(time_relative_object()) { relative = &docp->hr_start_time; } else if(time_relative_program()) { relative = &cfg.hr_start_time; } if(timerisset(end) && timerisset(begin) && timerisset(relative)) { time_diff = (end->tv_sec - relative->tv_sec) * 1000 + (end->tv_usec - relative->tv_usec) / 1000.0; return log_num(fd, name, width, time_diff); } else { if(cfg.sdemo_mode) { return log_num(fd, name, width, time_diff); } else { return dprintf(fd, " %*s", width - 1, "*"); } }}/*** Logs timings information. Modelled after short_log() above ***/void time_log(doc * docp){ int fd = 1; char *p; char pom[1024] = "\0"; static int header_printed = 0; /* The following are not enough in all cases, but the space * is just too precious in common case to care */ int time_width = 6; const int size_width = 8; const int result_width = strlen(" HTTP/1.1 302 Moved Temporarily"); if(!cfg.time_logfile) { return; } LOCK_TLOG; if(strcmp(cfg.time_logfile, "-")) { fd = open(cfg.time_logfile, O_BINARY | O_CREAT | O_APPEND | O_WRONLY, S_IRUSR | S_IWUSR); if(fd < 0) { xperror("timelog"); UNLOCK_TLOG; return; } if(_flock(fd, cfg.time_logfile, O_BINARY | O_CREAT | O_APPEND | O_WRONLY, TRUE)) { close(fd); UNLOCK_TLOG; return; } } if(time_relative_object()) { time_width = 6; } else if(time_relative_program()) { time_width = 7; } /* Log file is locked. No race here */ if(!header_printed) { dprintf(fd, "%*s", time_width, "START"); dprintf(fd, "%*s", time_width, "END"); dprintf(fd, "%*s", time_width, "DNS"); dprintf(fd, "%*s", time_width, "CONN"); dprintf(fd, "%*s", time_width, "FB"); dprintf(fd, "%*s", time_width, "LB"); dprintf(fd, "%*s", time_width, "TOTAL"); dprintf(fd, "%*s", size_width, "SIZE"); dprintf(fd, " %-*s", result_width - 1, "RESULT"); dprintf(fd, " %s\n", "URL"); header_printed = 1; } log_time(fd, "START", time_width, docp, &docp->hr_start_time, &cfg.hr_start_time); log_time(fd, "END", time_width, docp, &docp->end_time, &cfg.hr_start_time); log_time(fd, "DNS", time_width, docp, &docp->dns_time, &docp->hr_start_time); log_time(fd, "CONN", time_width, docp, &docp->connect_time, &docp->dns_time); log_time(fd, "FB", time_width, docp, &docp->first_byte_time, &docp->connect_time); log_time(fd, "LB", time_width, docp, &docp->end_time, &docp->first_byte_time); log_time(fd, "TOTAL", time_width, docp, &docp->end_time, &docp->hr_start_time); log_num(fd, "SIZE", size_width, docp->size); if(docp->mime) { int len = strcspn(docp->mime, "\r\n"); if(len >= result_width) len = result_width - 1; if(len) { strncpy(pom, docp->mime, len); /* FIXME: Security */ *(pom + len) = '\0'; } else { strcat(pom, errcodetype(docp->errcode)); /* FIXME: Security */ } dprintf(fd, " %-*s", result_width - 1, pom); } else { dprintf(fd, " %-*s", result_width - 1, errcodetype(docp->errcode)); } p = url_to_urlstr(docp->doc_url, FALSE); dprintf(fd, " %s\n", p); _free(p); if(fd != 1) { _funlock(fd); close(fd); } UNLOCK_TLOG;}static int log_fd = -1;int log_start(char *filename){ static char *log_filename = NULL; bool_t start_log = FALSE; bool_t stop_log = FALSE; LOCK_LOG; if(!filename) stop_log = TRUE; else { if(log_filename) { if(strcmp(log_filename, filename)) { start_log = TRUE; stop_log = TRUE; } } else start_log = TRUE; } if(stop_log) { if(log_fd >= 0) { char pom[1024]; time_t t = time(NULL); LOCK_TIME; strftime(pom, sizeof(pom), gettext("Ending log : %H:%M:%S %d.%m.%Y\n"), localtime(&t)); UNLOCK_TIME; _log_str(pom); _funlock(log_fd); close(log_fd); } } if(start_log) { int nr = 0; char nfn[PATH_MAX]; strncpy(nfn, filename, sizeof(nfn)); nfn[sizeof(nfn) - 1] = '\0'; while(TRUE) { log_fd = open(nfn, O_BINARY | O_CREAT | O_APPEND | O_WRONLY, 0644); if(log_fd < 0) { xperror(nfn); xprintf(0, gettext("Unable to open log file - disabling logging\n")); break; } if(_flock(log_fd, nfn, O_BINARY | O_CREAT | O_APPEND | O_WRONLY, FALSE)) { close(log_fd); log_fd = -1; xprintf(0, gettext("Log file is locked by another process - ")); if(cfg.gen_logname) { snprintf(nfn, sizeof(nfn), "%s.%04d", filename, nr); xprintf(0, gettext("generating new log filename\n")); nr++; } else { xprintf(0, gettext("disabling logging\n")); break; } } else { break; } } if(nr > 0) { filename = nfn; _free(cfg.logfile); cfg.logfile = tl_strdup(nfn); } if(log_fd >= 0) { char pom[1024]; time_t t = time(NULL); LOCK_TIME; strftime(pom, sizeof(pom), gettext("Starting log : %H:%M:%S %d.%m.%Y\n"), localtime(&t)); UNLOCK_TIME; _log_str(pom); } } _free(log_filename); log_filename = tl_strdup(filename); UNLOCK_LOG; return 0;}static void _log_str(char *str){ if(log_fd >= 0) write(log_fd, str, strlen(str));}void log_str(char *str){ LOCK_LOG; _log_str(str); UNLOCK_LOG;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?