⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 visitors.c

📁 web服务器访问统计
💻 C
📖 第 1 页 / 共 5 页
字号:
	r = ht_search(ht, key, &idx);	if (r == HT_NOTFOUND) {		return 0;	} else {		val = (long) ht_value(ht, idx);		return val;	}}/* Set a key/value pair inside the hash table with * a create-else-replace semantic. * * Return non-zero on out of memory. */int vi_replace(struct hashtable *ht, char *key, char *value){	char *k, *v;	k = strdup(key);	v = strdup(value);	if (!k || !v) goto err;	if (ht_replace(ht, k, v) != HT_OK)		goto err;	return 0;err:	if (k) free(k);	if (v) free(v);	return 1;}/* Replace the time value of the given key with the new one if this * is newer/older of the old one. If the key is new, it's just added * to the hash table with the specified time as value. * * If the 'ifolder' flag is set, values are replaced with older one, * otherwise with newer. * This function is only used by wrappers replace_if_older() and * replace_if_newer(). * * Return 0 on success, non-zero on out of memory. */int vi_replace_time(struct hashtable *ht, char *key, time_t time, int ifolder){	char *k = NULL;	unsigned int idx;	int r;	r = ht_search(ht, key, &idx);	if (r == HT_NOTFOUND) {		k = strdup(key);		if (!k) goto err;		if (ht_add(ht, k, (void*)time) != HT_OK) goto err;	} else {		time_t oldt = (time_t) ht_value(ht, idx);		/* Update the date if this one is older/nwer. */		if (ifolder) {			if (time < oldt)				ht_value(ht, idx) = (void*) time;		} else {			if (time > oldt)				ht_value(ht, idx) = (void*) time;		}	}	return 0;err:	if (k) free(k);	return 1;}/* see vi_replace_time */int vi_replace_if_older(struct hashtable *ht, char *key, time_t time){	return vi_replace_time(ht, key, time, 1);}/* see vi_replace_time */int vi_replace_if_newer(struct hashtable *ht, char *key, time_t time){	return vi_replace_time(ht, key, time, 0);}/* Set an error in the visitors handle */void vi_set_error(struct vih *vih, char *fmt, ...){	va_list ap;	char buf[VI_ERROR_MAX];	va_start(ap, fmt);	vsnprintf(buf, VI_ERROR_MAX, fmt, ap);	buf[VI_ERROR_MAX-1] = '\0';	free(vih->error);	vih->error = strdup(buf);	va_end(ap);}/* Get the error */char *vi_get_error(struct vih *vih){	if (!vih->error) {		return "No error";	}	return vih->error;}/* Clear the error */void vi_clear_error(struct vih *vih){	free(vih->error);	vih->error = NULL;}/*----------------------------------- parsing   ----------------------------- *//* Parse a line of log, and fill the logline structure with * appropriate values. On error (bad line format) non-zero is returned. */int vi_parse_line(struct logline *ll, char *l){	char *date, *hour, *timezone, *host, *agent, *req, *ref, *p;	char *agent_start = NULL, *req_end = NULL, *ref_end = NULL;        int agent_without_parens = 0;	/* Seek the start of the different components */	/* host */	host = l;	/* date */	if ((date = strchr(l, '[')) == NULL) return 1;	date++;	/* Identify user-agent start char. */	if ((agent = strchr(l, '(')) == NULL) {                /* Bad... user agent without (...) string, makes                 * the detection a bit slower and guessworkish. */                /* Check if the count of '"' chars in the string                 * is equal to six. If so, it's very likely that the                 * last field inside "" is the User Agent string, so                 * we get it. */                char *aux = l, *last = NULL;                int count = 0;                               /* Count '"' chars, save the last occurence found. */                while (*aux) {                    if (*aux == '"') {                        count++;                        last = aux;                    }                    aux++;                }                if (count == 6) {                    /* Ok! it seems like Combined log format.                     * Set a flag and get it later when the                     * rest of the log file is splitted. Now it's                     * too early to add \0 chars inside the line. */                    agent_without_parens = 1;                    agent_start = last-1;                    while(*agent_start != '"')                        agent_start--;                } else {                    /* No way... no user agent detected in this line. */		    agent = "";                }	} else {                /* User agent with () inside. Simple to detect, just                 * search the left and the right '"' chars enclosing                 * it. */		p = agent;		while (p >= l) {			if (*p == '"') {				agent_start = p;				break;			}			p--;		}	}	/* req */	if ((req = strstr(l, "\"GET")) != NULL ||	    (req = strstr(l, "\"POST")) != NULL ||	    (req = strstr(l, "\"HEAD")) != NULL ||	    (req = strstr(l, "\"get")) != NULL ||	    (req = strstr(l, "\"post")) != NULL ||	    (req = strstr(l, "\"head")) != NULL)	{		req++;	} else {		req = "";	}	/* ref */	if ((ref = strstr(l, "\"http")) != NULL ||	    (ref = strstr(l, "\"HTTP")) != NULL)	{		ref++;	} else {		ref = "";	}	/* Nul-term the components */	/* host */	if ((p = strchr(host, ' ')) == NULL) return 1;	*p = '\0';	/* date */	if ((p = strchr(date, ']')) == NULL) return 1;	*p = '\0';	ll->time = parse_date(date, &ll->tm);	if (ll->time == (time_t)-1) return 1;	/* hour */	if ((p = strchr(date, ':')) == NULL) return 1;	hour = p+1;	*p = '\0';	/* timezone */	if ((p = strchr(hour, ' ')) == NULL) return 1;	timezone = p+1;	*p = '\0';	/* req */	if ((p = strchr(req, '"')) == NULL) {		req = "";	} else {		req_end = p;		*p = '\0';		if ((p = strchr(req, ' ')) != NULL) {			req = p+1;			if ((p = strchr(req, ' ')) != NULL)				*p = '\0';		}	}	/* ref */	if ((p = strchr(ref, '"')) == NULL) {		ref = "";	} else {		ref_end = p;		*p = '\0';	}	/* agent */        if (agent_without_parens) {            /* User agent without (...) inside in a string with six '"' chars.             * Just search for the end. */            char *aux = strchr(agent_start+1, '"');            if (!aux) {                /* No way! */                agent = "";            } else {                *aux = '\0';                agent = agent_start+1;            }        } else if ((p = strchr(agent, ')')) == NULL) {		agent = "";	} else {		char *aux;		aux = strchr(p, '"');		if (aux)			*aux = '\0';		else			*(p+1) = '\0';		if (agent_start) {			if ((!req_end || (req_end != agent_start)) &&			    (!ref_end || (ref_end != agent_start))) {				agent = agent_start+1;			}		}	}	/* Fill the struture */	ll->host = host;	ll->date = date;	ll->hour = hour;	ll->timezone = timezone;	ll->agent = agent;	ll->req = req;	ll->ref = ref;	return 0;}/* process the weekday and hour information */void vi_process_date_and_hour(struct vih *vih, int weekday, int hour){	/* Note, the following sanity check is useless in theory. */	if (weekday < 0 || weekday > 6 || hour < 0 || hour > 23) return;	vih->weekday[weekday]++;	vih->hour[hour]++;	/* store the combined info. We always compute this information	 * even if the report is disabled because it's cheap. */	vih->weekdayhour[weekday][hour]++;}/* process the month and day information */void vi_process_month_and_day(struct vih *vih, int month, int day){	if (month < 0 || month > 11 || day < 0 || day > 30) return;	vih->monthday[month][day]++;}/* Process unique visitors populating the relative hash table. * Return non-zero on out of memory. This is also used to populate * the hashtable used for the "pageviews per user" statistics. * * Note that the last argument 'seen', is an integer passed by reference * that is set to '1' if this is not a new visit (otherwise it's set to zero) */int vi_process_visitors_per_day(struct vih *vih, char *host, char *agent, char *date, char *ref, char *req, int *seen){	char visday[VI_LINE_MAX], *p, *month = "fixme if I'm here!";        char buf[64];	int res, host_len, agent_len, date_len, hash_len;        unsigned long h;        /* Ignore visits from Bots */        if (vi_is_bot_agent(agent)) {            if (seen != NULL) seen = 0;            return 0;        }        /* Build an unique identifier for this visit         * adding together host, date and hash(user agent) */	host_len = strlen(host);	agent_len = strlen(agent);	date_len = strlen(date);        h = djb_hash((unsigned char*) agent, agent_len);        sprintf(buf, "%lu", h);        hash_len = strlen(buf);	if (host_len+agent_len+date_len+4 > VI_LINE_MAX)		return 0;	p = visday;	memcpy(p, host, host_len); p += host_len;	*p++ = '|';	memcpy(p, date, date_len); p += date_len;	*p++ = '|';	memcpy(p, buf, hash_len); p += hash_len;	*p = '\0';        /* fprintf(stderr, "%s\n", visday); */	if (Config_process_monthly_visitors) {		/* Skip the day number. */		month = strchr(date, '/');		if (!month) return 0; /* should never happen */		month++;	}	/* Visits with Google as referer are also stored in another hash	 * table. */	if (vi_is_google_link(ref)) {		res = vi_counter_incr(&vih->googlevisitors, visday);		if (res == 0) return 1; /* out of memory */		if (res == 1) { /* new visit! */			res = vi_counter_incr(&vih->googledate, date);			if (res == 0) return 1; /* out of memory */			if (Config_process_monthly_visitors) {				res = vi_counter_incr(&vih->googlemonth, month);				if (res == 0) return 1; /* out of memory */			}		}	}	/* Populate the 'pageviews per visitor' hash table */	if (Config_process_pageviews && vi_is_pageview(req)) {		res = vi_counter_incr(&vih->pageviews, visday);		if (res == 0) return 1; /* out of memory */	}	/* Mark the visit in the non-google-specific hashtable */	res = vi_counter_incr(&vih->visitors, visday);	if (res == 0) return 1; /* out of memory */	if (res > 1) {		if (seen) *seen = 1;		return 0; /* visit alredy seen. */	}	if (seen) *seen = 0; /* new visitor */	res = vi_counter_incr(&vih->date, date);	if (res == 0) return 1;	if (Config_process_monthly_visitors) {		res = vi_counter_incr(&vih->month, month);		if (res == 0) return 1;	}	return 0;}/* Process referers populating the relative hash tables. * Return non-zero on out of memory. */int vi_process_referer(struct vih *vih, char *ref, time_t age){	int res;        /* Check the url against the blacklist if needed         * this can be very slow... */        if (Config_filter_spam && vi_is_blacklisted_url(vih, ref))            return 0;	/* Don't count internal referer (specified by the user	 * using --prefix options), nor google referers. */	if (vi_is_internal_link(ref))		return !vi_counter_incr(&vih->referers, "Internal Link");	if (vi_is_google_link(ref))		return !vi_counter_incr(&vih->referers, "Google Search Engine");	res = vi_counter_incr(&vih->referers, ref);	if (res == 0) return 1;	/* Process the referers age if enabled */	if (Config_process_referers_age) {		if (vi_replace_if_older(&vih->referersage, ref, age)) return 1;	}	return 0;}/* Process requested URLs. Split the entries in two hash tables, * one for pages and one for images. * Return non-zero on out of memory. */int vi_process_page_request(struct vih *vih, char *url){	int res;	char urldecoded[VI_LINE_MAX];	vi_urldecode(urldecoded, url, VI_LINE_MAX);	if (vi_is_image(url))		res = vi_counter_incr(&vih->images, urldecoded);	else		res = vi_counter_incr(&vih->pages, urldecoded);	if (res == 0) return 1;	return 0;}/* Process log lines for 404 errors report. */int vi_process_error404(struct vih *vih, char *l, char *url, int *is404){	char urldecoded[VI_LINE_MAX];        if (is404) *is404 = 0;	vi_urldecode(urldecoded, url, VI_LINE_MAX);	if (strstr(l, " 404 ") && !strstr(l, " 200 ")) {                if (is404) *is404 = 1;		return !vi_counter_incr(&vih->error404, urldecoded);        }	return 0;}/* Process agents populating the relative hash table. * Return non-zero on out of memory. */int vi_process_agents(struct vih *vih, char *agent){	int res;	res = vi_counter_incr(&vih->agents, agent);	if (res == 0) return 1;	return 0;}/* Match the list of keywords 't' against the string 's', and if * a match is found increment the matching keyword in the hashtable. * Return zero on success, non-zero on out of memory . */int vi_counter_incr_matchtable(struct hashtable *ht, char *s, char **t){	while(*t) {		int res;		if ((*t)[0] == '\0' || strstr(s, *t) != NULL) {			char *key = *(t+1) ? *(t+1) : *t;			res = vi_counter_incr(ht, key);			if (res == 0) return 1;			return 0;		}		t += 2;	}	return 0;}/* Process Operating Systems populating the relative hash table. * Return non-zero on out of memory. */int vi_process_os(struct vih *vih, char *agent){	/* Order may matter. */	char *oslist[] = {		"Windows", NULL,		"Win98", "Windows",		"Win95", "Windows",		"WinNT", "Windows",		"Win32", "Windows",		"Linux", NULL,		"-linux-", "Linux",		"Macintosh", NULL,		"Mac_PowerPC", "Macintosh",		"SunOS", NULL,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -