webalizer.c

来自「统计日志的工具」· C语言 代码 · 共 1,572 行 · 第 1/5 页

C
1,572
字号
/*    webalizer - a web server log analysis program    Copyright (C) 1997-2001  Bradford L. Barrett (brad@mrunix.net)    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version, and provided that the above    copyright and permission notice is included with all distributed    copies of this or derived software.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA    This software uses the gd graphics library, which is copyright by    Quest Protein Database Center, Cold Spring Harbor Labs.  Please    see the documentation supplied with the library for additional    information and license terms, or visit www.boutell.com/gd/ for the    most recent version of the library and supporting documentation.*//*********************************************//* STANDARD INCLUDES                         *//*********************************************/#include <time.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <unistd.h>                           /* normal stuff             */#include <ctype.h>#include <sys/utsname.h>#include <sys/times.h>#include <zlib.h>/* ensure getopt */#ifdef HAVE_GETOPT_H#include <getopt.h>#endif/* ensure sys/types */#ifndef _SYS_TYPES_H#include <sys/types.h>#endif/* some systems need this */#ifdef HAVE_MATH_H#include <math.h>#endif/* SunOS 4.x Fix */#ifndef CLK_TCK#define CLK_TCK _SC_CLK_TCK#endif#ifdef USE_DNS#include <netinet/in.h>#include <arpa/inet.h>#ifdef HAVE_DB_185_H#include <db_185.h>#else#include <db.h>#endif  /* HAVE_DB_185_H */#endif  /* USE_DNS */#include "webalizer.h"                         /* main header              */#include "output.h"#include "parser.h"#include "preserve.h"#include "hashtab.h"#include "linklist.h"#include "webalizer_lang.h"                    /* lang. support            */#ifdef USE_DNS#include "dns_resolv.h"#endif/* internal function prototypes */void    clear_month();                              /* clear monthly stuff */char    *unescape(char *);                          /* unescape URL's      */char    from_hex(char);                             /* convert hex to dec  */void    print_opts(char *);                         /* print options       */void    print_version();                            /* duhh...             */int     isurlchar(unsigned char);                   /* valid URL char fnc. */void    get_config(char *);                         /* Read a config file  */static  char *save_opt(char *);                     /* save conf option    */void    srch_string(char *);                        /* srch str analysis   */char	*get_domain(char *);                        /* return domain name  */char    *our_gzgets(gzFile, char *, int);           /* our gzgets          *//*********************************************//* GLOBAL VARIABLES                          *//*********************************************/char    *version     = "2.01";                /* program version          */char    *editlvl     = "10";                  /* edit level               */char    *moddate     = "16-Apr-2002";         /* modification date        */char    *copyright   = "Copyright 1997-2001 by Bradford L. Barrett";int     verbose      = 2;                     /* 2=verbose,1=err, 0=none  */ int     debug_mode   = 0;                     /* debug mode flag          */int     time_me      = 0;                     /* timing display flag      */int     local_time   = 1;                     /* 1=localtime 0=GMT (UTC)  */int     ignore_hist  = 0;                     /* history flag (1=skip)    */int     hourly_graph = 1;                     /* hourly graph display     */int     hourly_stats = 1;                     /* hourly stats table       */int     daily_graph  = 1;                     /* daily graph display      */int     daily_stats  = 1;                     /* daily stats table        */int     ctry_graph   = 1;                     /* country graph display    */int     shade_groups = 1;                     /* Group shading 0=no 1=yes */int     hlite_groups = 1;                     /* Group hlite 0=no 1=yes   */int     mangle_agent = 0;                     /* mangle user agents       */int     incremental  = 0;                     /* incremental mode 1=yes   */int     use_https    = 0;                     /* use 'https://' on URL's  */int     visit_timeout= 1800;                  /* visit timeout (seconds)  */int     graph_legend = 1;                     /* graph legend (1=yes)     */int     graph_lines  = 2;                     /* graph lines (0=none)     */int     fold_seq_err = 0;                     /* fold seq err (0=no)      */int     log_type     = LOG_CLF;               /* (0=clf, 1=ftp, 2=squid)  */int     group_domains= 0;                     /* Group domains 0=none     */int     hide_sites   = 0;                     /* Hide ind. sites (0=no)   */char    *hname       = NULL;                  /* hostname for reports     */char    *state_fname = "webalizer.current";   /* run state file name      */char    *hist_fname  = "webalizer.hist";      /* name of history file     */char    *html_ext    = "html";                /* HTML file prefix         */char    *dump_ext    = "tab";                 /* Dump file prefix         */char    *conf_fname  = NULL;                  /* name of config file      */char    *log_fname   = NULL;                  /* log file pointer         */char    *out_dir     = NULL;                  /* output directory         */char    *blank_str   = "";                    /* blank string             */char    *dns_cache   = NULL;                  /* DNS cache file name      */int     dns_children = 0;                     /* DNS children (0=don't do)*/int     ntop_sites   = 30;                    /* top n sites to display   */int     ntop_sitesK  = 10;                    /* top n sites (by kbytes)  */int     ntop_urls    = 30;                    /* top n url's to display   */int     ntop_urlsK   = 10;                    /* top n url's (by kbytes)  */int     ntop_entry   = 10;                    /* top n entry url's        */int     ntop_exit    = 10;                    /* top n exit url's         */int     ntop_refs    = 30;                    /* top n referrers ""       */int     ntop_agents  = 15;                    /* top n user agents ""     */int     ntop_ctrys   = 30;                    /* top n countries   ""     */int     ntop_search  = 20;                    /* top n search strings     */int     ntop_users   = 20;                    /* top n users to display   */int     all_sites    = 0;                     /* List All sites (0=no)    */int     all_urls     = 0;                     /* List All URL's (0=no)    */int     all_refs     = 0;                     /* List All Referrers       */int     all_agents   = 0;                     /* List All User Agents     */int     all_search   = 0;                     /* List All Search Strings  */int     all_users    = 0;                     /* List All Usernames       */int     dump_sites   = 0;                     /* Dump tab delimited sites */int     dump_urls    = 0;                     /* URL's                    */int     dump_refs    = 0;                     /* Referrers                */int     dump_agents  = 0;                     /* User Agents              */int     dump_users   = 0;                     /* Usernames                */int     dump_search  = 0;                     /* Search strings           */int     dump_header  = 0;                     /* Dump header as first rec */char    *dump_path   = NULL;                  /* Path for dump files      */int     cur_year=0, cur_month=0,              /* year/month/day/hour      */        cur_day=0, cur_hour=0,                /* tracking variables       */        cur_min=0, cur_sec=0;u_long  cur_tstamp=0;                         /* Timestamp...             */u_long  rec_tstamp=0;  u_long  req_tstamp=0;u_long  epoch;                                /* used for timestamp adj.  */int     check_dup=0;                          /* check for dup flag       */int     gz_log=0;                             /* gziped log? (0=no)       */double  t_xfer=0.0;                           /* monthly total xfer value */u_long  t_hit=0,t_file=0,t_site=0,            /* monthly total vars       */        t_url=0,t_ref=0,t_agent=0,        t_page=0, t_visit=0, t_user=0;double  tm_xfer[31];                          /* daily transfer totals    */u_long  tm_hit[31], tm_file[31],              /* daily total arrays       */        tm_site[31], tm_page[31],        tm_visit[31];u_long  dt_site;                              /* daily 'sites' total      */u_long  ht_hit=0, mh_hit=0;                   /* hourly hits totals       */u_long  th_hit[24], th_file[24],              /* hourly total arrays      */        th_page[24];double  th_xfer[24];int     f_day,l_day;                          /* first/last day vars      */struct  utsname system_info;                  /* system info structure    */u_long  ul_bogus =0;                          /* Dummy counter for groups */struct  log_struct log_rec;                   /* expanded log storage     */time_t  now;                                  /* used by cur_time funct   */struct  tm *tp;                               /* to generate timestamp    */char    timestamp[32];                        /* for the reports          */gzFile  gzlog_fp;                             /* gzip logfile pointer     */FILE    *log_fp;                              /* regular logfile pointer  */char    buffer[BUFSIZE];                      /* log file record buffer   */char    tmp_buf[BUFSIZE];                     /* used to temp save above  */CLISTPTR *top_ctrys    = NULL;                /* Top countries table      */#define GZ_BUFSIZE 16384                      /* our_getfs buffer size    */char    f_buf[GZ_BUFSIZE];                    /* our_getfs buffer         */char    *f_cp=f_buf+GZ_BUFSIZE;               /* pointer into the buffer  */int     f_end;                                /* count to end of buffer   */ /*********************************************//* MAIN - start here                         *//*********************************************/int main(int argc, char *argv[]){   int      i;                           /* generic counter             */   char     *cp1, *cp2, *cp3, *str;      /* generic char pointers       */   NLISTPTR lptr;                        /* generic list pointer        */   extern char *optarg;                  /* used for command line       */   extern int optind;                    /* parsing routine 'getopt'    */   extern int opterr;   time_t start_time, end_time;          /* program timers              */   float  temp_time;                     /* temporary time storage      */   struct tms     mytms;                 /* bogus tms structure         */   int    rec_year,rec_month=1,rec_day,rec_hour,rec_min,rec_sec;   int    good_rec    =0;                /* 1 if we had a good record   */   u_long total_rec   =0;                /* Total Records Processed     */   u_long total_ignore=0;                /* Total Records Ignored       */   u_long total_bad   =0;                /* Total Bad Records           */   int    max_ctry;                      /* max countries defined       */   /* month names used for parsing logfile (shouldn't be lang specific) */   char *log_month[12]={ "jan", "feb", "mar",                         "apr", "may", "jun",                         "jul", "aug", "sep",                         "oct", "nov", "dec"};   /* initalize epoch */   epoch=jdate(1,1,1970);                /* used for timestamp adj.     */   /* add default index. alias */   add_nlist("index.",&index_alias);   sprintf(tmp_buf,"%s/webalizer.conf",ETCDIR);   /* check for default config file */   if (!access("webalizer.conf",F_OK))      get_config("webalizer.conf");   else if (!access(tmp_buf,F_OK))      get_config(tmp_buf);   /* get command line options */   opterr = 0;     /* disable parser errors */   while ((i=getopt(argc,argv,"a:A:c:C:dD:e:E:fF:g:GhHiI:l:Lm:M:n:N:o:pP:qQr:R:s:S:t:Tu:U:vVx:XY"))!=EOF)   {      switch (i)      {        case 'a': add_nlist(optarg,&hidden_agents); break; /* Hide agents   */        case 'A': ntop_agents=atoi(optarg);  break;  /* Top agents          */        case 'c': get_config(optarg);        break;  /* Config file         */        case 'C': ntop_ctrys=atoi(optarg);   break;  /* Top countries       */        case 'd': debug_mode=1;              break;  /* Debug               */	case 'D': dns_cache=optarg;          break;  /* DNS Cache filename  */        case 'e': ntop_entry=atoi(optarg);   break;  /* Top entry pages     */        case 'E': ntop_exit=atoi(optarg);    break;  /* Top exit pages      */        case 'f': fold_seq_err=1;            break;  /* Fold sequence errs  */        case 'F': log_type=(optarg[0]=='f')?                   LOG_FTP:(optarg[0]=='s')?                   LOG_SQUID:LOG_CLF;        break;  /* define log type     */	case 'g': group_domains=atoi(optarg); break; /* GroupDomains (0=no) */        case 'G': hourly_graph=0;            break;  /* no hourly graph     */        case 'h': print_opts(argv[0]);       break;  /* help                */        case 'H': hourly_stats=0;            break;  /* no hourly stats     */        case 'i': ignore_hist=1;             break;  /* Ignore history      */        case 'I': add_nlist(optarg,&index_alias); break; /* Index alias     */        case 'l': graph_lines=atoi(optarg);  break;  /* Graph Lines         */        case 'L': graph_legend=0;            break;  /* Graph Legends       */        case 'm': visit_timeout=atoi(optarg); break; /* Visit Timeout       */        case 'M': mangle_agent=atoi(optarg); break;  /* mangle user agents  */        case 'n': hname=optarg;              break;  /* Hostname            */        case 'N': dns_children=atoi(optarg); break;  /* # of DNS children   */        case 'o': out_dir=optarg;            break;  /* Output directory    */        case 'p': incremental=1;             break;  /* Incremental run     */        case 'P': add_nlist(optarg,&page_type); break; /* page view types   */        case 'q': verbose=1;                 break;  /* Quiet (verbose=1)   */        case 'Q': verbose=0;                 break;  /* Really Quiet        */        case 'r': add_nlist(optarg,&hidden_refs);   break; /* Hide referrer */        case 'R': ntop_refs=atoi(optarg);    break;  /* Top referrers       */        case 's': add_nlist(optarg,&hidden_sites);  break; /* Hide site     */        case 'S': ntop_sites=atoi(optarg);   break;  /* Top sites           */        case 't': msg_title=optarg;          break;  /* Report title        */        case 'T': time_me=1;                 break;  /* TimeMe              */        case 'u': add_nlist(optarg,&hidden_urls);   break; /* hide URL      */        case 'U': ntop_urls=atoi(optarg);    break;  /* Top urls            */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?