⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 visitors.c

📁 web服务器访问统计
💻 C
📖 第 1 页 / 共 5 页
字号:
/* visitors -- very fast web logs analyzer. * * Copyright (C) 2004-2006 Salvatore Sanfilippo <antirez@invece.org> * All Rights Reserved. * * This software is released under the terms of the GPL license version 2. * Read the COPYING file in this distribution for more details. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <time.h>#include <stdarg.h>#include <errno.h>#include <locale.h>#include <ctype.h>#include "aht.h"#include "antigetopt.h"#include "sleep.h"#include "blacklist.h"/* Max length of an error stored in the visitors handle */#define VI_ERROR_MAX 1024/* Max length of a log line */#define VI_LINE_MAX 4096/* Max number of filenames in the command line */#define VI_FILENAMES_MAX 1024/* Max number of prefixes in the command line */#define VI_PREFIXES_MAX 1024/* Max number of --grep --exclude patterns in the command line */#define VI_GREP_PATTERNS_MAX 1024/* Abbreviation length for HTML outputs */#define VI_HTML_ABBR_LEN 100/* Version as a string */#define VI_DATE_MAX 64/* Max length of a log entry date */#define VI_VERSION_STR "0.7"/*------------------------------- data structures ----------------------------*//* visitors handle */struct vih {	int startt;	int endt;	int processed;	int invalid;        int blacklisted;	int hour[24];	int weekday[7];	int weekdayhour[7][24]; /* hour and weekday combined data */	int monthday[12][31]; /* month and day combined data */	struct hashtable visitors;	struct hashtable googlevisitors;	struct hashtable pages;	struct hashtable images;	struct hashtable error404;	struct hashtable pageviews;	struct hashtable pageviews_grouped;	struct hashtable referers;	struct hashtable referersage;	struct hashtable date;	struct hashtable googledate;        struct hashtable adsensed;	struct hashtable month;	struct hashtable googlemonth;	struct hashtable agents;	struct hashtable googled;	struct hashtable googlevisits;	struct hashtable googlekeyphrases;	struct hashtable googlekeyphrasesage;	struct hashtable trails;	struct hashtable tld;	struct hashtable os;	struct hashtable browsers;	struct hashtable robots;        struct hashtable googlehumanlanguage;        struct hashtable screenres;        struct hashtable screendepth;	char *error;};/* info associated with a line of log */struct logline {	char *host;	char *date;	char *hour;	char *timezone;	char *req;	char *ref;	char *agent;	time_t time;	struct tm tm;};/* output module structure. See below for the definition of * the text and html output modules. */struct outputmodule {	void (*print_header)(FILE *fp);	void (*print_footer)(FILE *fp);	void (*print_title)(FILE *fp, char *title);	void (*print_subtitle)(FILE *fp, char *title);	void (*print_numkey_info)(FILE *fp, char *key, int val);	void (*print_keykey_entry)(FILE *fp, char *key1, char *key2, int num);	void (*print_numkey_entry)(FILE *fp, char *key, int val, char *link,			int num);	void (*print_numkeybar_entry)(FILE *fp, char *key, int max, int tot,			int this);	void (*print_numkeycomparativebar_entry)(FILE *fp, char *key, int tot,			int this);	void (*print_bidimentional_map)(FILE *fp, int xlen, int ylen,			char **xlabel, char **ylabel, int *value);	void (*print_hline)(FILE *fp);	void (*print_credits)(FILE *fp);	void (*print_report_link)(FILE *fp, char *report);};/* Just a string with cached length */struct vistring {	char *str;	int len;};/* Grep pattern for --grep --exclude */#define VI_PATTERNTYPE_GREP 0#define VI_PATTERNTYPE_EXCLUDE 1struct greppat {    int type;    char *pattern;};/* ---------------------- global configuration parameters ------------------- */int Config_debug = 0;int Config_max_referers = 20;int Config_max_referers_age = 20;int Config_max_pages = 20;int Config_max_images = 20;int Config_max_error404 = 20;int Config_max_agents = 20;int Config_max_googled = 20;int Config_max_adsensed = 20;int Config_max_google_keyphrases = 20;int Config_max_google_keyphrases_age = 20;int Config_max_trails = 20;int Config_max_tld = 20;int Config_max_robots = 20;int Config_process_agents = 0;int Config_process_google = 0;int Config_process_google_keyphrases = 0;int Config_process_google_keyphrases_age = 0;int Config_process_google_human_language = 0;int Config_process_web_trails = 0;int Config_process_weekdayhour_map = 0;int Config_process_monthday_map = 0;int Config_process_referers_age = 0;int Config_process_tld = 0;int Config_process_os = 0;int Config_process_browsers = 0;int Config_process_error404 = 0;int Config_process_pageviews = 0;int Config_process_monthly_visitors = 1;int Config_process_robots = 0;int Config_process_screen_info = 0;int Config_graphviz_mode = 0;int Config_graphviz_ignorenode_google = 0;int Config_graphviz_ignorenode_external = 0;int Config_graphviz_ignorenode_noreferer = 0;int Config_tail_mode = 0;int Config_stream_mode = 0;int Config_update_every = 60*10; /* update every 10 minutes for default. */int Config_reset_every = 0;	/* never reset for default */int Config_time_delta = 0;	/* adjustable time difference */int Config_filter_spam = 0;int Config_ignore_404 = 0;char *Config_output_file = NULL; /* stdout if not set. */struct outputmodule *Output = NULL; /* intialized to 'text' in main() *//* Prefixes */int Config_prefix_num = 0;	/* number of set prefixes */struct vistring Config_prefix[VI_PREFIXES_MAX];/* Grep/Exclude array */struct greppat Config_grep_pattern[VI_GREP_PATTERNS_MAX];int Config_grep_pattern_num = 0;    /* number of set patterns *//*----------------------------------- Tables ---------------------------------*/static char *vi_wdname[7] = {"Mo", "Tu", "We", "Th", "Fr", "Sa", "Su"};#if 0static int vi_monthdays[12] = {31, 29, 31, 30, 31, 30 , 31, 31, 30, 31, 30, 31};#endif/* -------------------------------- prototypes ------------------------------ */void vi_clear_error(struct vih *vih);void vi_tail(int filec, char **filev);/*------------------- Options parsing help functions ------------------------ */void ConfigAddGrepPattern(char *pattern, int type){    char *s;    int len = strlen(pattern);    if (Config_grep_pattern_num == VI_GREP_PATTERNS_MAX) {        fprintf(stderr, "Too many grep/exclude options specified\n");        exit(1);    }    s = malloc(strlen(pattern)+3);    s[0] = '*';    memcpy(s+1, pattern, len);    s[len+1] = '*';    s[len+2] = '\0';    Config_grep_pattern[Config_grep_pattern_num].type = type;    Config_grep_pattern[Config_grep_pattern_num].pattern = s;    Config_grep_pattern_num++;}/*------------------------------ support functions -------------------------- *//* Returns non-zero if the link seems like a google link, zero otherwise. * Note that this function only checks for a prefix of www.google.<something>. * so may be fooled. */int vi_is_google_link(char *s){	return !strncmp(s, "http://www.google.", 18);}/* Returns non-zero if the user agent appears to be the GoogleBot. */int vi_is_googlebot_agent(char *agent) {	if (strstr(agent, "Googlebot") ||            strstr(agent, "googlebot")) return 1;        return 0;}/* Returns non-zero if the user agent appears to be the Mediapartners-Google. */int vi_is_adsensebot_agent(char *agent) {	if (strstr(agent, "Mediapartners-Google")) return 1;        return 0;}int vi_is_yahoobot_agent(char *agent) {        if (strstr(agent, "Yahoo! Slurp")) return 1;        return 0;}int vi_is_msbot_agent(char *agent) {        if (strstr(agent, "msn.com/msnbot.htm")) return 1;        return 0;}/* Try to guess if a given agent string is about a crawler/bot * of some time. This function MUST be conservative, because * false negatives are acceptable while false positives arent. */int vi_is_genericbot_agent(char *agent) {        if (strstr(agent, "crawler") ||            strstr(agent, "Crawler") ||            strstr(agent, "bot/") ||            strstr(agent, "Bot/") ||            strstr(agent, "bot.htm") ||            strstr(agent, "+http://")) return 1;        return 0;}int vi_is_bot_agent(char *agent) {    if (vi_is_googlebot_agent(agent) ||        vi_is_adsensebot_agent(agent) ||        vi_is_yahoobot_agent(agent) ||        vi_is_msbot_agent(agent)) return 1;    return 0;}/* Returns non-zero if the url matches some user-specified prefix. * being a link "internal" to the site. Otherwise zero is returned. * * When there is a match, the value returned is the length of * the matching prefix. */int vi_is_internal_link(char *url){	int i, l;	if (!Config_prefix_num) return 0; /* no prefixes set? */	l = strlen(url);	for (i = 0; i < Config_prefix_num; i++) {		if (Config_prefix[i].len <= l &&		    !strncasecmp(url, Config_prefix[i].str,			    Config_prefix[i].len))		{			return Config_prefix[i].len;		}	}	return 0;}/* returns non-zero if the URL 's' seems an image or a CSS file. */int vi_is_image(char *s){	int l = strlen(s);	char *end = s + l; /* point to the nul term */	if (l < 5) return 0;	if (!memcmp(end-4, ".css", 4) || 	    !memcmp(end-4, ".jpg", 4) || 	    !memcmp(end-4, ".gif", 4) ||	    !memcmp(end-4, ".png", 4) ||	    !memcmp(end-4, ".ico", 4) ||	    !memcmp(end-4, ".swf", 4) ||	    !memcmp(end-3, ".js", 3) ||	    !memcmp(end-5, ".jpeg", 5) ||	    !memcmp(end-4, ".CSS", 4) ||	    !memcmp(end-4, ".JPG", 4) ||	    !memcmp(end-4, ".GIF", 4) ||	    !memcmp(end-4, ".PNG", 4) ||	    !memcmp(end-4, ".ICO", 4) ||	    !memcmp(end-4, ".SWF", 4) ||	    !memcmp(end-3, ".JS", 3) ||	    !memcmp(end-5, ".JPEG", 5)) return 1;	return 0;}/* returns non-zero if the URL 's' seems a real page. */int vi_is_pageview(char *s){	int l = strlen(s);	char *end = s + l; /* point to the nul term */	char *dot, *slash;	if (s[l-1] == '/') return 1;	if (l >= 6 &&	    (!memcmp(end-5, ".html", 5) || 	    !memcmp(end-4, ".htm", 4) || 	    !memcmp(end-4, ".php", 4) ||	    !memcmp(end-4, ".asp", 4) ||	    !memcmp(end-4, ".jsp", 4) ||	    !memcmp(end-4, ".xdl", 4) ||	    !memcmp(end-5, ".xhtml", 5) ||	    !memcmp(end-4, ".xml", 4) ||	    !memcmp(end-4, ".cgi", 4) ||	    !memcmp(end-3, ".pl", 3) ||	    !memcmp(end-6, ".shtml", 6) ||	    !memcmp(end-5, ".HTML", 5) || 	    !memcmp(end-4, ".HTM", 4) || 	    !memcmp(end-4, ".PHP", 4) ||	    !memcmp(end-4, ".ASP", 4) ||	    !memcmp(end-4, ".JSP", 4) ||	    !memcmp(end-4, ".XDL", 4) ||	    !memcmp(end-6, ".XHTML", 6) ||	    !memcmp(end-4, ".XML", 4) ||	    !memcmp(end-4, ".CGI", 4) ||	    !memcmp(end-3, ".PL", 3) ||	    !memcmp(end-6, ".SHTML", 6))) return 1;	dot = strrchr(s, '.');	if (!dot) return 1;	slash = strrchr(s, '/');	if (slash && slash > dot) return 1;	return 0;}/* returns non-zero if 'ip' seems a string representing an IP address * like "1.2.3.4". Note that 'ip' is always an IP or an hostname * so this function actually test if the string pointed by 'ip' only * contains characters in the "[0-9.]" set */int vi_is_numeric_address(char *ip){	unsigned int l = strlen(ip);	return strspn(ip, "0123456789.") == l;}/* returns the time converted into a time_t value. * On error (time_t) -1 is returned. * Note that this function is specific for the following format: * "10/May/2004:04:15:33". Works if the month is not an abbreviation, or if the * year is abbreviated to only the last two digits. * The time can be omitted like in "10/May/2004". */time_t parse_date(char *s, struct tm *tmptr){	struct tm tm;	time_t t;	char *months[] = {		"jan", "feb", "mar", "apr", "may", "jun",		"jul", "aug", "sep", "oct", "nov", "dec",	};	char *day, *month, *year, *time = NULL;	char monthaux[32];	int i, len;	/* make a copy to mess with it */	len = strlen(s);	if (len >= 32) goto fmterr;	memcpy(monthaux, s, len);	monthaux[len] = '\0';	/* Inizialize the tm structure. We just fill three fields */	tm.tm_sec = 0;	tm.tm_min = 0;	tm.tm_hour = 0;	tm.tm_mday = 0;	tm.tm_mon = 0;	tm.tm_year = 0;	tm.tm_wday = 0;	tm.tm_yday = 0;	tm.tm_isdst = -1;	/* search delimiters */	day = monthaux;	if ((month = strchr(day, '/')) == NULL) goto fmterr;	*month++ = '\0';	if ((year = strchr(month, '/')) == NULL) goto fmterr;	*year++ = '\0';	/* time, optional for this parser. */	if ((time = strchr(year, ':')) != NULL) {		*time++ = '\0';	}	/* convert day */	tm.tm_mday = atoi(day);	if (tm.tm_mday < 1 || tm.tm_mday > 31) goto fmterr;	/* convert month */	if (strlen(month) < 3) goto fmterr;	month[0] = tolower(month[0]);	month[1] = tolower(month[1]);	month[2] = tolower(month[2]);	for (i = 0; i < 12; i++) {		if (memcmp(month, months[i], 3) == 0) break;	}	if (i == 12) goto fmterr;	tm.tm_mon = i;	/* convert year */	tm.tm_year = atoi(year);	if (tm.tm_year > 100) {		if (tm.tm_year < 1900 || tm.tm_year > 2500) goto fmterr;		tm.tm_year -= 1900;	} else {		/* if the year is in two-digits form, the 0 - 68 range		 * is converted to 2000 - 2068 */		if (tm.tm_year < 69)			tm.tm_year += 100;	}	/* convert time */	if (time) { /* format is HH:MM:SS */		if (strlen(time) < 8) goto fmterr;		tm.tm_hour = ((time[0]-'0')*10)+(time[1]-'0');		if (tm.tm_hour < 0 || tm.tm_hour > 23) goto fmterr;		tm.tm_min = ((time[3]-'0')*10)+(time[4]-'0');		if (tm.tm_min < 0 || tm.tm_min > 59) goto fmterr;		tm.tm_sec = ((time[6]-'0')*10)+(time[7]-'0');		if (tm.tm_sec < 0 || tm.tm_sec > 60) goto fmterr;	}	t = mktime(&tm);	if (t == (time_t)-1) goto fmterr;	t += (Config_time_delta*3600);	if (tmptr) {		struct tm *auxtm;		if ((auxtm = localtime(&t)) != NULL)			*tmptr = *auxtm;	}	return t;fmterr: /* format error */	return (time_t) -1;}/* returns 1 if the given date is Saturday or Sunday. * Zero is otherwise returned. */int vi_is_weekend(char *s){	struct tm tm;	if (parse_date(s, &tm) != (time_t)-1) {		if (tm.tm_wday == 0 || tm.tm_wday == 6)			return 1;	}	return 0;}#if 0

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -