📄 visitors.c
字号:
r = ht_search(ht, key, &idx); if (r == HT_NOTFOUND) { return 0; } else { val = (long) ht_value(ht, idx); return val; }}/* Set a key/value pair inside the hash table with * a create-else-replace semantic. * * Return non-zero on out of memory. */int vi_replace(struct hashtable *ht, char *key, char *value){ char *k, *v; k = strdup(key); v = strdup(value); if (!k || !v) goto err; if (ht_replace(ht, k, v) != HT_OK) goto err; return 0;err: if (k) free(k); if (v) free(v); return 1;}/* Replace the time value of the given key with the new one if this * is newer/older of the old one. If the key is new, it's just added * to the hash table with the specified time as value. * * If the 'ifolder' flag is set, values are replaced with older one, * otherwise with newer. * This function is only used by wrappers replace_if_older() and * replace_if_newer(). * * Return 0 on success, non-zero on out of memory. */int vi_replace_time(struct hashtable *ht, char *key, time_t time, int ifolder){ char *k = NULL; unsigned int idx; int r; r = ht_search(ht, key, &idx); if (r == HT_NOTFOUND) { k = strdup(key); if (!k) goto err; if (ht_add(ht, k, (void*)time) != HT_OK) goto err; } else { time_t oldt = (time_t) ht_value(ht, idx); /* Update the date if this one is older/nwer. */ if (ifolder) { if (time < oldt) ht_value(ht, idx) = (void*) time; } else { if (time > oldt) ht_value(ht, idx) = (void*) time; } } return 0;err: if (k) free(k); return 1;}/* see vi_replace_time */int vi_replace_if_older(struct hashtable *ht, char *key, time_t time){ return vi_replace_time(ht, key, time, 1);}/* see vi_replace_time */int vi_replace_if_newer(struct hashtable *ht, char *key, time_t time){ return vi_replace_time(ht, key, time, 0);}/* Set an error in the visitors handle */void vi_set_error(struct vih *vih, char *fmt, ...){ va_list ap; char buf[VI_ERROR_MAX]; va_start(ap, fmt); vsnprintf(buf, VI_ERROR_MAX, fmt, ap); buf[VI_ERROR_MAX-1] = '\0'; free(vih->error); vih->error = strdup(buf); va_end(ap);}/* Get the error */char *vi_get_error(struct vih *vih){ if (!vih->error) { return "No error"; } return vih->error;}/* Clear the error */void vi_clear_error(struct vih *vih){ free(vih->error); vih->error = NULL;}/*----------------------------------- parsing ----------------------------- *//* Parse a line of log, and fill the logline structure with * appropriate values. On error (bad line format) non-zero is returned. */int vi_parse_line(struct logline *ll, char *l){ char *date, *hour, *timezone, *host, *agent, *req, *ref, *p; char *agent_start = NULL, *req_end = NULL, *ref_end = NULL; int agent_without_parens = 0; /* Seek the start of the different components */ /* host */ host = l; /* date */ if ((date = strchr(l, '[')) == NULL) return 1; date++; /* Identify user-agent start char. */ if ((agent = strchr(l, '(')) == NULL) { /* Bad... user agent without (...) string, makes * the detection a bit slower and guessworkish. */ /* Check if the count of '"' chars in the string * is equal to six. If so, it's very likely that the * last field inside "" is the User Agent string, so * we get it. */ char *aux = l, *last = NULL; int count = 0; /* Count '"' chars, save the last occurence found. */ while (*aux) { if (*aux == '"') { count++; last = aux; } aux++; } if (count == 6) { /* Ok! it seems like Combined log format. * Set a flag and get it later when the * rest of the log file is splitted. Now it's * too early to add \0 chars inside the line. */ agent_without_parens = 1; agent_start = last-1; while(*agent_start != '"') agent_start--; } else { /* No way... no user agent detected in this line. */ agent = ""; } } else { /* User agent with () inside. Simple to detect, just * search the left and the right '"' chars enclosing * it. */ p = agent; while (p >= l) { if (*p == '"') { agent_start = p; break; } p--; } } /* req */ if ((req = strstr(l, "\"GET")) != NULL || (req = strstr(l, "\"POST")) != NULL || (req = strstr(l, "\"HEAD")) != NULL || (req = strstr(l, "\"get")) != NULL || (req = strstr(l, "\"post")) != NULL || (req = strstr(l, "\"head")) != NULL) { req++; } else { req = ""; } /* ref */ if ((ref = strstr(l, "\"http")) != NULL || (ref = strstr(l, "\"HTTP")) != NULL) { ref++; } else { ref = ""; } /* Nul-term the components */ /* host */ if ((p = strchr(host, ' ')) == NULL) return 1; *p = '\0'; /* date */ if ((p = strchr(date, ']')) == NULL) return 1; *p = '\0'; ll->time = parse_date(date, &ll->tm); if (ll->time == (time_t)-1) return 1; /* hour */ if ((p = strchr(date, ':')) == NULL) return 1; hour = p+1; *p = '\0'; /* timezone */ if ((p = strchr(hour, ' ')) == NULL) return 1; timezone = p+1; *p = '\0'; /* req */ if ((p = strchr(req, '"')) == NULL) { req = ""; } else { req_end = p; *p = '\0'; if ((p = strchr(req, ' ')) != NULL) { req = p+1; if ((p = strchr(req, ' ')) != NULL) *p = '\0'; } } /* ref */ if ((p = strchr(ref, '"')) == NULL) { ref = ""; } else { ref_end = p; *p = '\0'; } /* agent */ if (agent_without_parens) { /* User agent without (...) inside in a string with six '"' chars. * Just search for the end. */ char *aux = strchr(agent_start+1, '"'); if (!aux) { /* No way! */ agent = ""; } else { *aux = '\0'; agent = agent_start+1; } } else if ((p = strchr(agent, ')')) == NULL) { agent = ""; } else { char *aux; aux = strchr(p, '"'); if (aux) *aux = '\0'; else *(p+1) = '\0'; if (agent_start) { if ((!req_end || (req_end != agent_start)) && (!ref_end || (ref_end != agent_start))) { agent = agent_start+1; } } } /* Fill the struture */ ll->host = host; ll->date = date; ll->hour = hour; ll->timezone = timezone; ll->agent = agent; ll->req = req; ll->ref = ref; return 0;}/* process the weekday and hour information */void vi_process_date_and_hour(struct vih *vih, int weekday, int hour){ /* Note, the following sanity check is useless in theory. */ if (weekday < 0 || weekday > 6 || hour < 0 || hour > 23) return; vih->weekday[weekday]++; vih->hour[hour]++; /* store the combined info. We always compute this information * even if the report is disabled because it's cheap. */ vih->weekdayhour[weekday][hour]++;}/* process the month and day information */void vi_process_month_and_day(struct vih *vih, int month, int day){ if (month < 0 || month > 11 || day < 0 || day > 30) return; vih->monthday[month][day]++;}/* Process unique visitors populating the relative hash table. * Return non-zero on out of memory. This is also used to populate * the hashtable used for the "pageviews per user" statistics. * * Note that the last argument 'seen', is an integer passed by reference * that is set to '1' if this is not a new visit (otherwise it's set to zero) */int vi_process_visitors_per_day(struct vih *vih, char *host, char *agent, char *date, char *ref, char *req, int *seen){ char visday[VI_LINE_MAX], *p, *month = "fixme if I'm here!"; char buf[64]; int res, host_len, agent_len, date_len, hash_len; unsigned long h; /* Ignore visits from Bots */ if (vi_is_bot_agent(agent)) { if (seen != NULL) seen = 0; return 0; } /* Build an unique identifier for this visit * adding together host, date and hash(user agent) */ host_len = strlen(host); agent_len = strlen(agent); date_len = strlen(date); h = djb_hash((unsigned char*) agent, agent_len); sprintf(buf, "%lu", h); hash_len = strlen(buf); if (host_len+agent_len+date_len+4 > VI_LINE_MAX) return 0; p = visday; memcpy(p, host, host_len); p += host_len; *p++ = '|'; memcpy(p, date, date_len); p += date_len; *p++ = '|'; memcpy(p, buf, hash_len); p += hash_len; *p = '\0'; /* fprintf(stderr, "%s\n", visday); */ if (Config_process_monthly_visitors) { /* Skip the day number. */ month = strchr(date, '/'); if (!month) return 0; /* should never happen */ month++; } /* Visits with Google as referer are also stored in another hash * table. */ if (vi_is_google_link(ref)) { res = vi_counter_incr(&vih->googlevisitors, visday); if (res == 0) return 1; /* out of memory */ if (res == 1) { /* new visit! */ res = vi_counter_incr(&vih->googledate, date); if (res == 0) return 1; /* out of memory */ if (Config_process_monthly_visitors) { res = vi_counter_incr(&vih->googlemonth, month); if (res == 0) return 1; /* out of memory */ } } } /* Populate the 'pageviews per visitor' hash table */ if (Config_process_pageviews && vi_is_pageview(req)) { res = vi_counter_incr(&vih->pageviews, visday); if (res == 0) return 1; /* out of memory */ } /* Mark the visit in the non-google-specific hashtable */ res = vi_counter_incr(&vih->visitors, visday); if (res == 0) return 1; /* out of memory */ if (res > 1) { if (seen) *seen = 1; return 0; /* visit alredy seen. */ } if (seen) *seen = 0; /* new visitor */ res = vi_counter_incr(&vih->date, date); if (res == 0) return 1; if (Config_process_monthly_visitors) { res = vi_counter_incr(&vih->month, month); if (res == 0) return 1; } return 0;}/* Process referers populating the relative hash tables. * Return non-zero on out of memory. */int vi_process_referer(struct vih *vih, char *ref, time_t age){ int res; /* Check the url against the blacklist if needed * this can be very slow... */ if (Config_filter_spam && vi_is_blacklisted_url(vih, ref)) return 0; /* Don't count internal referer (specified by the user * using --prefix options), nor google referers. */ if (vi_is_internal_link(ref)) return !vi_counter_incr(&vih->referers, "Internal Link"); if (vi_is_google_link(ref)) return !vi_counter_incr(&vih->referers, "Google Search Engine"); res = vi_counter_incr(&vih->referers, ref); if (res == 0) return 1; /* Process the referers age if enabled */ if (Config_process_referers_age) { if (vi_replace_if_older(&vih->referersage, ref, age)) return 1; } return 0;}/* Process requested URLs. Split the entries in two hash tables, * one for pages and one for images. * Return non-zero on out of memory. */int vi_process_page_request(struct vih *vih, char *url){ int res; char urldecoded[VI_LINE_MAX]; vi_urldecode(urldecoded, url, VI_LINE_MAX); if (vi_is_image(url)) res = vi_counter_incr(&vih->images, urldecoded); else res = vi_counter_incr(&vih->pages, urldecoded); if (res == 0) return 1; return 0;}/* Process log lines for 404 errors report. */int vi_process_error404(struct vih *vih, char *l, char *url, int *is404){ char urldecoded[VI_LINE_MAX]; if (is404) *is404 = 0; vi_urldecode(urldecoded, url, VI_LINE_MAX); if (strstr(l, " 404 ") && !strstr(l, " 200 ")) { if (is404) *is404 = 1; return !vi_counter_incr(&vih->error404, urldecoded); } return 0;}/* Process agents populating the relative hash table. * Return non-zero on out of memory. */int vi_process_agents(struct vih *vih, char *agent){ int res; res = vi_counter_incr(&vih->agents, agent); if (res == 0) return 1; return 0;}/* Match the list of keywords 't' against the string 's', and if * a match is found increment the matching keyword in the hashtable. * Return zero on success, non-zero on out of memory . */int vi_counter_incr_matchtable(struct hashtable *ht, char *s, char **t){ while(*t) { int res; if ((*t)[0] == '\0' || strstr(s, *t) != NULL) { char *key = *(t+1) ? *(t+1) : *t; res = vi_counter_incr(ht, key); if (res == 0) return 1; return 0; } t += 2; } return 0;}/* Process Operating Systems populating the relative hash table. * Return non-zero on out of memory. */int vi_process_os(struct vih *vih, char *agent){ /* Order may matter. */ char *oslist[] = { "Windows", NULL, "Win98", "Windows", "Win95", "Windows", "WinNT", "Windows", "Win32", "Windows", "Linux", NULL, "-linux-", "Linux", "Macintosh", NULL, "Mac_PowerPC", "Macintosh", "SunOS", NULL,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -