📄 process.c
字号:
if (!site_found && !strncmp(c1, search, strlen(search))) { data_StrInt *data; char *c4; mlist *p = ((data_SubList *)l->data)->sublist; if ((c4 = strchr(c1, '='))) { c4++; } else { c4 = c1; } key_found = 1; found_key = search; /* we got the searchkey, let's check the site now */ while (p) { data_Match *_data; int n; _data = p->data; n = pcre_exec(_data->match, NULL, record->ref_url, strlen(record->ref_url), 0, 0, NULL, 0); if (n < 0) { if (n != PCRE_ERROR_NOMATCH) { fprintf(stderr,"%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n); } } else { break; } p = p->next; } if (p) { data_Match *_data = p->data; if (_data && _data->string) { int key = strtol(_data->string,NULL,10); if (strlen(_data->string) > 2 && _data->string[0] == '"' && _data->string[strlen(_data->string)-1] == '"') { /* grouping of the urls */ char *grouped; char *s = malloc(strlen(_data->string)); strncpy(s, _data->string+1, strlen(_data->string)-1); s[strlen(_data->string)-2] = '\0'; if ((grouped = group_field(ext_conf, urlescape(c4), M_WEB_GROUP_SEARCHSTRINGS))) { data = createStr2Int(grouped, 1, M_GROUP); mhash_insert(staweb->searchstring, data); free(grouped); } else { data = createStr2Int(urlescape(c4), 1, M_PLAIN); mhash_insert(staweb->searchstring, data); } data = createStr2Int(s, 1, M_GROUP); mhash_insert(staweb->searchsite, data); free(s); } else if (key >= 0) { /* take the url and be happy */ char *grouped; if ((grouped = group_field(ext_conf, urlescape(c4), M_WEB_GROUP_SEARCHSTRINGS))) { data = createStr2Int(grouped, 1, M_GROUP); mhash_insert(staweb->searchstring, data); free(grouped); } else { data = createStr2Int(urlescape(c4), 1, M_PLAIN); mhash_insert(staweb->searchstring, data); } data = createStr2Int(record->ref_url, 1, M_PLAIN); mhash_insert(staweb->searchsite, data); } else if (key < 0) { /* this is FALSE detection, just ignore it */ } else { fprintf(stderr, "%s.%d: don't know how the handle searchstring-definition-action: %s\n",__FILE__, __LINE__, _data->string); } } else { char *grouped; if ((grouped = group_field(ext_conf, urlescape(c4), M_WEB_GROUP_SEARCHSTRINGS))) { data = createStr2Int(grouped, 1, M_GROUP); mhash_insert(staweb->searchstring, data); free(grouped); } else { data = createStr2Int(urlescape(c4), 1, M_PLAIN); mhash_insert(staweb->searchstring, data); } data = createStr2Int(record->ref_url, 1, M_PLAIN); mhash_insert(staweb->searchsite, data); } site_found = 1; } } l = l->next; } if (conf->debug_searchengines > 0 && !site_found && key_found) { char *k = malloc(strlen(found_key)); k[strlen(found_key)-1] = '\0'; strncpy(k, found_key, strlen(found_key)-1); fprintf(stderr, "o SK: ?? %s -> %s\n", record->ref_url, record->ref_getvars); fprintf(stderr, "o --: section [%s] string: %s\n", k, record->ref_url); free(k); } else if (conf->debug_searchengines > 1 && !site_found && !key_found) { fprintf(stderr, "o SE: ?? %s -> %s\n", record->ref_url, record->ref_getvars); fprintf(stderr, "o --: section [%s] string: %s\n", found_key, record->ref_url); } return site_found;}char *urltolower(char *str) { char *s; if (!str) return NULL; if (!(strncasecmp(str, "http://", strlen("http://")))) { s = str; while (*s && *s != '/') *s++ = tolower(*s); s = str + strlen("http://"); } else if (!(strncasecmp(str, "https://", strlen("https://")))) { s = str; while (*s && *s != '/') *s++ = tolower(*s); s = str + strlen("https://"); } else if (!(strncasecmp(str, "ftp://", strlen("ftp://")))) { s = str; while (*s && *s != '/') *s++ = tolower(*s); s = str + strlen("ftp://"); } else { s = str; } while (*s && *s != '/') *s++ = tolower(*s); return str;}mstate *splitter(mconfig *ext_conf, mlist *state_list, mlogrec *record) { config_processor *conf = ext_conf->processor; char *name = NULL; /* name if the state -> directory-name */ mstate *state = NULL; int split_enable = 0; /* record extension */ mlogrec_web *recweb = NULL; /* record web extensions */ mlogrec_web_extclf *recext = NULL; mlogrec_web_squid *recsquid = NULL; mlogrec_web_ftp *recftp = NULL; recweb = record->ext; switch (recweb->ext_type) { case M_RECORD_TYPE_WEB_EXTCLF: recext = recweb->ext; break; case M_RECORD_TYPE_WEB_FTP: recftp = recweb->ext; break; case M_RECORD_TYPE_WEB_SQUID: recsquid = recweb->ext; break; } if (conf->split_def) { mlist *l = conf->split_def; while (l) { data_Split *data = l->data; char *str = NULL; if (!data) break; split_enable = 1; /* decide which field we shall look at */ switch(data->type) { case M_SPLIT_FIELD_REQURL: str = recweb->req_url; break; case M_SPLIT_FIELD_REQUSER: str = recweb->req_user; break; case M_SPLIT_FIELD_SRVHOST: if (recext) str = recext->srv_host; break; case M_SPLIT_FIELD_SRVPORT: if (recext) str = recext->srv_port; break; case M_SPLIT_FIELD_REQHOST: str = recweb->req_host; break; case M_SPLIT_FIELD_REFURL: if (recext) str = recext->ref_url; break; case M_SPLIT_FIELD_DEFAULT: break; default: fprintf(stderr, "%s.%d: unknown type: %d\n", __FILE__, __LINE__, data->type); } if (ext_conf->debug_level > 2) fprintf(stderr, "%s.%d: -1- type: %d - %s\n", __FILE__, __LINE__, data->type, str); if (str != NULL) { /* do the test on the string */ name = substitute(ext_conf, data->match, data->string, str); } else if (data->type == M_SPLIT_FIELD_DEFAULT) { /* if a default is specified it is used when it occures */ name = malloc(strlen(data->string)+1); strcpy(name, data->string); if (ext_conf->debug_level > 2) fprintf(stderr, "%s.%d: (def) state-name: %s\n", __FILE__, __LINE__, name); } if (name) break; l = l->next; } } if (split_enable == 0) { /* splitter isn't enabled, take a default name */ name = malloc(1); *name = '\0'; } if (name) { /* we've got a name. try to find the list entry with this name */ mlist *l = state_list; while (l) { data_State *data = l->data; if (!data) break; if (!strcmp(name, data->string)) { state = data->state; break; } if (!l->next) break; l = l->next; } if (!state) { data_State *data = createState(name,NULL,NULL); mlist_insert(state_list, data); state = data->state; } free(name); } else { fprintf(stderr, "%s.%d: no match found by the splitter. isn't there a default ??\n", __FILE__, __LINE__); } return state;}int mplugins_processor_insert_record(mconfig *ext_conf, mlist *state_list, mlogrec *record) { config_processor *conf = ext_conf->processor; struct tm *tm; int isvisit = 0, isfile = 0, ispage = 0; /* record extension */ mlogrec_web *recweb = NULL; /* record web extensions */ mlogrec_web_extclf *recext = NULL; mlogrec_web_squid *recsquid = NULL; mlogrec_web_ftp *recftp = NULL; mstate_web *staweb = NULL; mstate *state = NULL; if (record->ext_type != M_RECORD_TYPE_WEB) return -1; if (record->ext == NULL) return -1; recweb = record->ext; state = splitter(ext_conf, state_list, record); if (state == NULL) return -1; switch (recweb->ext_type) { case M_RECORD_TYPE_WEB_EXTCLF: recext = recweb->ext; break; case M_RECORD_TYPE_WEB_FTP: recftp = recweb->ext; break; case M_RECORD_TYPE_WEB_SQUID: recsquid = recweb->ext; break; } if (state->ext) { switch(state->ext_type) { case M_STATE_TYPE_WEB: staweb = state->ext; break; default: fprintf(stderr, "%s.%d: unsupport state subtype\n", __FILE__, __LINE__); return -1; } } else { state->ext = mstate_init_web(); state->ext_type = M_STATE_TYPE_WEB; staweb = state->ext; } urltolower(recweb->req_url); urltolower(recweb->req_host); if (recext != NULL) urltolower(recext->ref_url);/* hourly/daily stats */ if ((tm = localtime(&(record->timestamp)))) { /* perhaps we have created a new state */ if (!state->timestamp) { state->year = tm->tm_year+1900; state->month = tm->tm_mon+1; } state->timestamp = record->timestamp; staweb->hours[tm->tm_hour].xfersize += recweb->xfersize; staweb->days[tm->tm_mday-1].xfersize += recweb->xfersize; staweb->hours[tm->tm_hour].hits++; staweb->days[tm->tm_mday-1].hits++; if (is_page(ext_conf, recweb)) { staweb->hours[tm->tm_hour].pages++; staweb->days[tm->tm_mday-1].pages++; ispage = 1; } stripindex(recweb->req_url); if (recext != NULL) stripindex(recext->ref_url); if (is_file(recweb)) { staweb->hours[tm->tm_hour].files++; staweb->days[tm->tm_mday-1].files++; isfile = 1; } if (is_visit(ext_conf, state, record)) { staweb->hours[tm->tm_hour].visits++; staweb->days[tm->tm_mday-1].visits++; isvisit = 1; } }/* Used Protocol for this query*/ if (recweb->req_protocol) { data_StrInt *data = createStrInt(recweb->req_protocol, 1); mhash_insert(staweb->req_prot_hash, data); }/* User Method for this query (GET, POST, PUT, HEAD, OPTIONS) */ if (recweb->req_method) { data_StrInt *data = createStrInt(recweb->req_method, 1); mhash_insert(staweb->req_meth_hash, data); } if (recweb->req_status) { char buf[4]; data_StrInt *data; sprintf(buf, "%3d", recweb->req_status); data = createStrInt(buf, 1); mhash_insert(staweb->status_hash, data);/* FIXME: specific to HTTP */ switch (recweb->req_status) { case 404: if (!hide_field(ext_conf, recweb->req_url, M_WEB_HIDE_BROKENLINK)) { char *grouped; data_BrokenLink *link; if ((grouped = group_field(ext_conf, recweb->req_url, M_WEB_GROUP_BROKENLINK))) { link = createBrokenLink(grouped, 1, M_GROUP, record->timestamp, recext != NULL ? recext->ref_url : NULL); mhash_insert(staweb->status_missing_file, link); free(grouped); } else { link = createBrokenLink(recweb->req_url, 1, M_PLAIN, record->timestamp, recext != NULL ? recext->ref_url : NULL); mhash_insert(staweb->status_missing_file, link); } } break; case 500: { data_BrokenLink *link; link = createBrokenLink(recweb->req_url, 1, M_PLAIN, record->timestamp, recext != NULL ? recext->ref_url : NULL); mhash_insert(staweb->status_internal_error, link); break; } } } if (recweb->req_host && !hide_field(ext_conf, recweb->req_host, M_WEB_HIDE_HOST)) { char *grouped; char *req_hostname = recweb->req_host; /* try to resolve the ip */ if ((grouped = strrchr(recweb->req_host, '.'))) { /* check if we have to resolve the IP */ if (isondx(grouped+1) == M_RESOLV_UNRESOLVED) { /* we have to ask he resolver for an FQDN */#ifdef HAVE_LIBADNS adns_answer *answer = NULL; data_Query *query; if (conf->debug_resolver) fprintf(stderr, "%s.%d: resolve %-15s -- ", __FILE__, __LINE__, recweb->req_host); if (!ext_conf->disable_resolver) { if ((query = mhash_get_data(ext_conf->query_hash, recweb->req_host))) { if (!query->resolved_ip) { adns_wait(*(ext_conf->adns), query->query, &answer, NULL); if (answer->status == adns_s_ok) { query->resolved_ip = malloc(strlen(*answer->rrs.str)+1); strcpy(query->resolved_ip, *answer->rrs.str); if (conf->debug_resolver) fprintf(stderr, "cache miss "); } else { if (conf->debug_resolver) fprintf(stderr, "error "); query->resolved_ip = malloc(strlen(recweb->req_host)+1); strcpy(query->resolved_ip, recweb->req_host); } free(answer); } else { if (conf->debug_resolver) fprintf(stderr, "cache hit "); } if (conf->debug_resolver) fprintf(stderr, "--> %s\n", query->resolved_ip); req_hostname = query->resolved_ip; } }#endif } } if ((grouped = group_field(ext_conf, req_hostname, M_WEB_GROUP_HOST))) { data_Str3Int *data = createStr3Int(grouped, 1, M_GROUP, isvisit); mhash_insert(staweb->host_hash, data); free(grouped); } else { data_Str3Int *data = createStr3Int(req_hostname, 1, M_PLAIN, isvisit); mhash_insert(staweb->host_hash, data); } /* splitting the TLD from the FQDN */ if ((grouped = strrchr(req_hostname, '.'))) { if (misoname(grouped+1)) { data_Str3Int *data = createStr3Int(misoname(grouped+1), 1, M_PLAIN, isvisit); mhash_insert(staweb->country_hash, data); } } } if (recweb->req_url) { data_StrInt *data; char *c1, c2; if ((c1 = strstr(recweb->req_url, "favicon.ico"))) { c2 = *c1; *c1 = '\0'; data = createStrInt(recweb->req_url, 1); mhash_insert(staweb->bookmarks, data); *c1 = c2; } else if (is_robot(recweb->req_url)) { if (recext == NULL || recext->req_useragent == NULL) { if (ext_conf->debug_level > 1) fprintf(stderr, "%s requested w/o useragent set - ignored.\n",recweb->req_url); } else { data = createStrInt(recext->req_useragent, 1); mhash_insert(staweb->robots, data); } } /* hide url */ if (!hide_field(ext_conf, recweb->req_url, M_WEB_HIDE_REQ_URL)) { char *grouped; if ((grouped = group_field(ext_conf, recweb->req_url, M_WEB_GROUP_REQ_URL))) { data = createStr2Int(grouped, 1, M_GROUP); mhash_insert(staweb->req_url_hash, data); free(grouped); } else { data = createStr2Int(recweb->req_url, 1, M_PLAIN); mhash_insert(staweb->req_url_hash, data); } } }/*** Extensions*//* User Operating System */ if (recext != NULL) { if (recext->req_useros) { char *grouped; if ((grouped = group_field(ext_conf, recext->req_useros, M_WEB_GROUP_OS))) { data_Str3Int *data = createStr3Int(grouped, 1, M_GROUP, isvisit); mhash_insert(staweb->os_hash, data); free(grouped); } else { data_Str3Int *data = createStr3Int(recext->req_useros, 1, M_PLAIN, isvisit); mhash_insert(staweb->os_hash, data); } }/* User Agent */ if (recext->req_useragent) { char *grouped; if ((grouped = group_field(ext_conf, recext->req_useragent, M_WEB_GROUP_UA))) { data_Str3Int *data = createStr3Int(grouped, 1, M_GROUP, isvisit); mhash_insert(staweb->ua_hash, data); free(grouped); } else { data_Str3Int *data = createStr3Int(recext->req_useragent, 1, M_PLAIN, isvisit); mhash_insert(staweb->ua_hash, data); } } if (recext->ref_url) { if (!hide_field(ext_conf, recext->ref_url, M_WEB_HIDE_REFERRER)) { if (!is_searchengine(ext_conf, state, recext)) { char *grouped; data_StrInt *data; if ((grouped = group_field(ext_conf, recext->ref_url,M_WEB_GROUP_REFERRER))) { data = createStr2Int(grouped, 1, M_GROUP); mhash_insert(staweb->ref_url_hash, data); free(grouped); } else { data = createStr2Int(recext->ref_url, 1, M_PLAIN); mhash_insert(staweb->ref_url_hash, data); } } } } } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -