webalizer.c
来自「统计日志的工具」· C语言 代码 · 共 1,572 行 · 第 1/5 页
C
1,572 行
/* webalizer - a web server log analysis program Copyright (C) 1997-2001 Bradford L. Barrett (brad@mrunix.net) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version, and provided that the above copyright and permission notice is included with all distributed copies of this or derived software. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA This software uses the gd graphics library, which is copyright by Quest Protein Database Center, Cold Spring Harbor Labs. Please see the documentation supplied with the library for additional information and license terms, or visit www.boutell.com/gd/ for the most recent version of the library and supporting documentation.*//*********************************************//* STANDARD INCLUDES *//*********************************************/#include <time.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h> /* normal stuff */#include <ctype.h>#include <sys/utsname.h>#include <sys/times.h>#include <zlib.h>/* ensure getopt */#ifdef HAVE_GETOPT_H#include <getopt.h>#endif/* ensure sys/types */#ifndef _SYS_TYPES_H#include <sys/types.h>#endif/* some systems need this */#ifdef HAVE_MATH_H#include <math.h>#endif/* SunOS 4.x Fix */#ifndef CLK_TCK#define CLK_TCK _SC_CLK_TCK#endif#ifdef USE_DNS#include <netinet/in.h>#include <arpa/inet.h>#ifdef HAVE_DB_185_H#include <db_185.h>#else#include <db.h>#endif /* HAVE_DB_185_H */#endif /* USE_DNS */#include "webalizer.h" /* main header */#include "output.h"#include "parser.h"#include "preserve.h"#include "hashtab.h"#include "linklist.h"#include "webalizer_lang.h" /* lang. support */#ifdef USE_DNS#include "dns_resolv.h"#endif/* internal function prototypes */void clear_month(); /* clear monthly stuff */char *unescape(char *); /* unescape URL's */char from_hex(char); /* convert hex to dec */void print_opts(char *); /* print options */void print_version(); /* duhh... */int isurlchar(unsigned char); /* valid URL char fnc. */void get_config(char *); /* Read a config file */static char *save_opt(char *); /* save conf option */void srch_string(char *); /* srch str analysis */char *get_domain(char *); /* return domain name */char *our_gzgets(gzFile, char *, int); /* our gzgets *//*********************************************//* GLOBAL VARIABLES *//*********************************************/char *version = "2.01"; /* program version */char *editlvl = "10"; /* edit level */char *moddate = "16-Apr-2002"; /* modification date */char *copyright = "Copyright 1997-2001 by Bradford L. Barrett";int verbose = 2; /* 2=verbose,1=err, 0=none */ int debug_mode = 0; /* debug mode flag */int time_me = 0; /* timing display flag */int local_time = 1; /* 1=localtime 0=GMT (UTC) */int ignore_hist = 0; /* history flag (1=skip) */int hourly_graph = 1; /* hourly graph display */int hourly_stats = 1; /* hourly stats table */int daily_graph = 1; /* daily graph display */int daily_stats = 1; /* daily stats table */int ctry_graph = 1; /* country graph display */int shade_groups = 1; /* Group shading 0=no 1=yes */int hlite_groups = 1; /* Group hlite 0=no 1=yes */int mangle_agent = 0; /* mangle user agents */int incremental = 0; /* incremental mode 1=yes */int use_https = 0; /* use 'https://' on URL's */int visit_timeout= 1800; /* visit timeout (seconds) */int graph_legend = 1; /* graph legend (1=yes) */int graph_lines = 2; /* graph lines (0=none) */int fold_seq_err = 0; /* fold seq err (0=no) */int log_type = LOG_CLF; /* (0=clf, 1=ftp, 2=squid) */int group_domains= 0; /* Group domains 0=none */int hide_sites = 0; /* Hide ind. sites (0=no) */char *hname = NULL; /* hostname for reports */char *state_fname = "webalizer.current"; /* run state file name */char *hist_fname = "webalizer.hist"; /* name of history file */char *html_ext = "html"; /* HTML file prefix */char *dump_ext = "tab"; /* Dump file prefix */char *conf_fname = NULL; /* name of config file */char *log_fname = NULL; /* log file pointer */char *out_dir = NULL; /* output directory */char *blank_str = ""; /* blank string */char *dns_cache = NULL; /* DNS cache file name */int dns_children = 0; /* DNS children (0=don't do)*/int ntop_sites = 30; /* top n sites to display */int ntop_sitesK = 10; /* top n sites (by kbytes) */int ntop_urls = 30; /* top n url's to display */int ntop_urlsK = 10; /* top n url's (by kbytes) */int ntop_entry = 10; /* top n entry url's */int ntop_exit = 10; /* top n exit url's */int ntop_refs = 30; /* top n referrers "" */int ntop_agents = 15; /* top n user agents "" */int ntop_ctrys = 30; /* top n countries "" */int ntop_search = 20; /* top n search strings */int ntop_users = 20; /* top n users to display */int all_sites = 0; /* List All sites (0=no) */int all_urls = 0; /* List All URL's (0=no) */int all_refs = 0; /* List All Referrers */int all_agents = 0; /* List All User Agents */int all_search = 0; /* List All Search Strings */int all_users = 0; /* List All Usernames */int dump_sites = 0; /* Dump tab delimited sites */int dump_urls = 0; /* URL's */int dump_refs = 0; /* Referrers */int dump_agents = 0; /* User Agents */int dump_users = 0; /* Usernames */int dump_search = 0; /* Search strings */int dump_header = 0; /* Dump header as first rec */char *dump_path = NULL; /* Path for dump files */int cur_year=0, cur_month=0, /* year/month/day/hour */ cur_day=0, cur_hour=0, /* tracking variables */ cur_min=0, cur_sec=0;u_long cur_tstamp=0; /* Timestamp... */u_long rec_tstamp=0; u_long req_tstamp=0;u_long epoch; /* used for timestamp adj. */int check_dup=0; /* check for dup flag */int gz_log=0; /* gziped log? (0=no) */double t_xfer=0.0; /* monthly total xfer value */u_long t_hit=0,t_file=0,t_site=0, /* monthly total vars */ t_url=0,t_ref=0,t_agent=0, t_page=0, t_visit=0, t_user=0;double tm_xfer[31]; /* daily transfer totals */u_long tm_hit[31], tm_file[31], /* daily total arrays */ tm_site[31], tm_page[31], tm_visit[31];u_long dt_site; /* daily 'sites' total */u_long ht_hit=0, mh_hit=0; /* hourly hits totals */u_long th_hit[24], th_file[24], /* hourly total arrays */ th_page[24];double th_xfer[24];int f_day,l_day; /* first/last day vars */struct utsname system_info; /* system info structure */u_long ul_bogus =0; /* Dummy counter for groups */struct log_struct log_rec; /* expanded log storage */time_t now; /* used by cur_time funct */struct tm *tp; /* to generate timestamp */char timestamp[32]; /* for the reports */gzFile gzlog_fp; /* gzip logfile pointer */FILE *log_fp; /* regular logfile pointer */char buffer[BUFSIZE]; /* log file record buffer */char tmp_buf[BUFSIZE]; /* used to temp save above */CLISTPTR *top_ctrys = NULL; /* Top countries table */#define GZ_BUFSIZE 16384 /* our_getfs buffer size */char f_buf[GZ_BUFSIZE]; /* our_getfs buffer */char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */int f_end; /* count to end of buffer */ /*********************************************//* MAIN - start here *//*********************************************/int main(int argc, char *argv[]){ int i; /* generic counter */ char *cp1, *cp2, *cp3, *str; /* generic char pointers */ NLISTPTR lptr; /* generic list pointer */ extern char *optarg; /* used for command line */ extern int optind; /* parsing routine 'getopt' */ extern int opterr; time_t start_time, end_time; /* program timers */ float temp_time; /* temporary time storage */ struct tms mytms; /* bogus tms structure */ int rec_year,rec_month=1,rec_day,rec_hour,rec_min,rec_sec; int good_rec =0; /* 1 if we had a good record */ u_long total_rec =0; /* Total Records Processed */ u_long total_ignore=0; /* Total Records Ignored */ u_long total_bad =0; /* Total Bad Records */ int max_ctry; /* max countries defined */ /* month names used for parsing logfile (shouldn't be lang specific) */ char *log_month[12]={ "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"}; /* initalize epoch */ epoch=jdate(1,1,1970); /* used for timestamp adj. */ /* add default index. alias */ add_nlist("index.",&index_alias); sprintf(tmp_buf,"%s/webalizer.conf",ETCDIR); /* check for default config file */ if (!access("webalizer.conf",F_OK)) get_config("webalizer.conf"); else if (!access(tmp_buf,F_OK)) get_config(tmp_buf); /* get command line options */ opterr = 0; /* disable parser errors */ while ((i=getopt(argc,argv,"a:A:c:C:dD:e:E:fF:g:GhHiI:l:Lm:M:n:N:o:pP:qQr:R:s:S:t:Tu:U:vVx:XY"))!=EOF) { switch (i) { case 'a': add_nlist(optarg,&hidden_agents); break; /* Hide agents */ case 'A': ntop_agents=atoi(optarg); break; /* Top agents */ case 'c': get_config(optarg); break; /* Config file */ case 'C': ntop_ctrys=atoi(optarg); break; /* Top countries */ case 'd': debug_mode=1; break; /* Debug */ case 'D': dns_cache=optarg; break; /* DNS Cache filename */ case 'e': ntop_entry=atoi(optarg); break; /* Top entry pages */ case 'E': ntop_exit=atoi(optarg); break; /* Top exit pages */ case 'f': fold_seq_err=1; break; /* Fold sequence errs */ case 'F': log_type=(optarg[0]=='f')? LOG_FTP:(optarg[0]=='s')? LOG_SQUID:LOG_CLF; break; /* define log type */ case 'g': group_domains=atoi(optarg); break; /* GroupDomains (0=no) */ case 'G': hourly_graph=0; break; /* no hourly graph */ case 'h': print_opts(argv[0]); break; /* help */ case 'H': hourly_stats=0; break; /* no hourly stats */ case 'i': ignore_hist=1; break; /* Ignore history */ case 'I': add_nlist(optarg,&index_alias); break; /* Index alias */ case 'l': graph_lines=atoi(optarg); break; /* Graph Lines */ case 'L': graph_legend=0; break; /* Graph Legends */ case 'm': visit_timeout=atoi(optarg); break; /* Visit Timeout */ case 'M': mangle_agent=atoi(optarg); break; /* mangle user agents */ case 'n': hname=optarg; break; /* Hostname */ case 'N': dns_children=atoi(optarg); break; /* # of DNS children */ case 'o': out_dir=optarg; break; /* Output directory */ case 'p': incremental=1; break; /* Incremental run */ case 'P': add_nlist(optarg,&page_type); break; /* page view types */ case 'q': verbose=1; break; /* Quiet (verbose=1) */ case 'Q': verbose=0; break; /* Really Quiet */ case 'r': add_nlist(optarg,&hidden_refs); break; /* Hide referrer */ case 'R': ntop_refs=atoi(optarg); break; /* Top referrers */ case 's': add_nlist(optarg,&hidden_sites); break; /* Hide site */ case 'S': ntop_sites=atoi(optarg); break; /* Top sites */ case 't': msg_title=optarg; break; /* Report title */ case 'T': time_me=1; break; /* TimeMe */ case 'u': add_nlist(optarg,&hidden_urls); break; /* hide URL */ case 'U': ntop_urls=atoi(optarg); break; /* Top urls */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?