⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 process.c

📁 100 病毒源碼,原始碼,無毒 ......
💻 C
📖 第 1 页 / 共 2 页
字号:
/*** Modular Logfile Analyzer** Copyright 2000 Jan Kneschke <jan@kneschke.de>**** Homepage: http://www.kneschke.de/projekte/modlogan**    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version, and provided that the above    copyright and permission notice is included with all distributed    copies of this or derived software.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA**** $Id: process.c,v 1.13 2001/01/11 22:26:25 jk Exp $*/#include <libintl.h>#include <locale.h>#include <stdlib.h>#include <stdio.h>#include <time.h>#include <string.h>#include <ctype.h>#include <pcre.h>#include "config.h"#include "mrecord.h"#include "mlocale.h"#include "mconfig.h"#include "mplugins.h"#include "mstate.h"#include "mdatatypes.h"#include "misc.h"#include "plugin_config.h"#include "webdatatypes.h"char *substitute(mconfig *ext_conf, pcre *match, char *subst, const char *str) {	config_processor *conf = ext_conf->processor;	char *name = NULL;	/* name if the state -> directory-name */	#define N 20	int ovector[3 * N], n;/* the dawn string */	if ((n = pcre_exec(match, NULL, str, strlen(str), 0, 0, ovector, 3 * N)) < 0) {		if (n != PCRE_ERROR_NOMATCH) {			fprintf(stderr, "%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);			return NULL;		}	}/* the string has matched somehow */	if (n >= 0) {		char *s;	/* */		int sl = 0;	/* calculated string length */		int _err = 0;	/* error flag */		const char **list;				pcre_get_substring_list(str, ovector, n, &list);				s = subst;		/* check if we can safely create the state-name */		while (*s && _err == 0) {			if (*s == '$') {				if (isdigit(*(s+1))) {					int d = *(s+1) - '0';										if (d > n || d < 0) {						_err = 1;						fprintf(stderr, "%s.%d: match-index out of range: %s - %d\n", __FILE__, __LINE__, subst, d);					} else {						sl += strlen(list[d]);						s += 2;					}				} else {					fprintf(stderr, "%s.%d: $ isn't followed by a digit: %s\n", __FILE__, __LINE__, subst);					_err = 1;				}			} else {				sl++;				s++;			}		}				if (_err == 0) {			char *dst;		/* the string is ok. copy the string accordingly to the def */			s = subst;						name = malloc(sl+1);			*name = '\0';									dst = name;						while (*s) {				if (*s == '$' && isdigit(*(s+1))) {					int d = *(s+1) - '0';										if (ext_conf->debug_level > 2)						fprintf(stderr, "%s.%d: catting (%d): %s\n", __FILE__, __LINE__, d, list[d]);											if ( d <= n ) {						strcpy(dst, list[d]);											dst += strlen(list[d]);						s += 2;					} else {				/* just a paranoid check */						fprintf(stderr, "%s.%d: ALARM !!! REPORT ME: (should have already been already detected)\n", __FILE__, __LINE__);						fprintf(stderr, "%s.%d: requested index '$%d' is put of range (0-%d). check your splitby definition", __FILE__, __LINE__, d, n);						fprintf(stderr, "%s.%d: using $%d as of the directoryname instead.", __FILE__, __LINE__, d);						*dst++ = *s++;						*dst = '\0';					}				} else {					*dst++ = *s++;					*dst = '\0';				}			}					/* just a paranoid check */			if (strlen(name) > sl)				fprintf(stderr, "%s.%d: ALARM !!! REPORT ME: strlen(name) > sl [ %d > %d ]. possible SEGV\n", __FILE__, __LINE__, strlen(name), sl);					}		pcre_free(list);	}		return name;}char *stripindex(char *str) {	char *c1;		if(!str) return NULL;		if ((c1 = strstr(str, "/index."))) {		*(c1+1) = '\0';	}		if (strlen(str) > 1 && (str[strlen(str)-1] == '/')) {		str[strlen(str)-1] = '\0';	}		return str;}#define M_WEB_HIDE_HOST		1#define M_WEB_HIDE_REQ_URL	2#define M_WEB_HIDE_REFERRER	3#define M_WEB_HIDE_BROKENLINK	4#define M_WEB_GROUP_REFERRER	1#define M_WEB_GROUP_HOST	2#define M_WEB_GROUP_OS		3#define M_WEB_GROUP_UA		4#define M_WEB_GROUP_REQ_URL	5#define M_WEB_GROUP_BROKENLINK	6#define M_WEB_GROUP_SEARCHSTRINGS 7int is_hidden(mlist *l, const char *url) {	if (!url || !l) return 0;		while (l) {		data_Match *data = (data_Match *)l->data;				if (data && strmatch(data->match, url)) {			return 1;		}				l = l->next;	}		return 0;}int hide_field(mconfig *ext_conf, const char *url, int field) {	config_processor *conf = ext_conf->processor;	mlist *l = NULL;		switch (field) {		case M_WEB_HIDE_REFERRER:			l = conf->hide_referrer;			break;		case M_WEB_HIDE_BROKENLINK:			l = conf->hide_brokenlinks;			break;		case M_WEB_HIDE_REQ_URL:			l = conf->hide_url;			break;		case M_WEB_HIDE_HOST:			l = conf->hide_host;			break;		default:			fprintf(stderr, "%s.%d: Unknown group field: %d\n", __FILE__, __LINE__, field);			break;	}				if (!url || !l) return 0;		return is_hidden(l, url);}char * is_grouped (mconfig *ext_conf, mlist *l, const char *str) {		if (!str || !l) return NULL;		while (l) {		data_Match *data = (data_Match *)l->data;				if (data && strmatch(data->match, str)) {			return substitute(ext_conf, data->match, data->string, str);		}				l = l->next;	}		return NULL;}char * group_field (mconfig *ext_conf, const char *str, int field) {	config_processor *conf = ext_conf->processor;	mlist *l = NULL;		switch (field) {		case M_WEB_GROUP_REFERRER:			l = conf->group_referrer;			break;		case M_WEB_GROUP_HOST:			l = conf->group_hosts;			break;		case M_WEB_GROUP_OS:			l = conf->group_os;			break;		case M_WEB_GROUP_UA:			l = conf->group_ua;			break;		case M_WEB_GROUP_REQ_URL:			l = conf->group_url;			break;		case M_WEB_GROUP_BROKENLINK:			l = conf->group_brokenlinks;			break;		case M_WEB_GROUP_SEARCHSTRINGS:			l = conf->group_searchstrings;			break;		default:			fprintf(stderr, "%s.%d: Unknown group field: %d\n", __FILE__, __LINE__, field);			break;	}		if (!str || !l) return NULL;		return is_grouped (ext_conf, l, str);		}int is_file(mlogrec_web *record) {	if (record->req_status == 200) {		return 1;	}	return 0;}int is_page(mconfig *ext_conf, mlogrec_web *record) {		config_processor *conf = ext_conf->processor;	mlist *l = conf->page_type;	char *url = record->req_url;		if (!url || !l) return 0;		if (!is_file(record)) return 0;		while (l) {		data_Match *data = (data_Match *)l->data;				if (data && strmatch(data->match, url)) {#if 0			fprintf(stderr, "%s.%d: is a page: %s\n", __FILE__, __LINE__, url);#endif			return 1;		}				l = l->next;	}		return 0;}int is_robot(const char *url) {		if (!url) return 0;		return !strcmp(url, "/robots.txt");}int is_visit(mconfig *ext_conf, mstate *state,mlogrec *record) {	config_processor *conf = ext_conf->processor;	mstate_web *staweb = state->ext;	mlist *l = staweb->visit_list;	int visit_timeout = conf->visit_timeout;	int visited = 0;	int new_entry = 1;	int debug_me = conf->debug_visits;		mlogrec_web *recweb = record->ext;	mlogrec_web_extclf *recext = NULL;		if (recweb == NULL)  return -1;	if (recweb->req_url == NULL)  return -1;		/* can be NULL, but that handled below */	recext = recweb->ext;		while(l) {		data_Visit *data = (data_Visit *)l->data;				if (!data) break;				/* exit-page */		if (record->timestamp - data->timestamp > visit_timeout) {			mlist *act;			data_StrInt *_data;						if (data->type == MVISIT_ROBOT) {				_data = createStrInt(data->lasturl, 1);				mhash_insert(staweb->indexed_pages, _data);			}						if (debug_me) {				fprintf(stderr, "process.is_visit: <- %20s (%20s), time: %ld - %ld\n",data->string, data->useragent, data->timestamp, record->timestamp - data->timestamp);			}						/* writing last url to exitpage */			_data = createStrInt(data->lasturl, 1);			mhash_insert(staweb->exit_pages, _data);						staweb->allvisittime += data->timediff;			staweb->allvisitlength += data->count;							act = l;						if (l->next) {				l = l->next;								if (act->next)					act->next->prev = act->prev;				if (act->prev) {					act->prev->next = act->next;				} else {					staweb->visit_list = l;				}								mlist_free_entry(act);			} else {				data->destructor(data);								l->data = NULL;								break;			}		/* another hit for the same visit */		} else if (!strcmp(recweb->req_host, data->string) && 			(!recext || !(recext->req_useragent) || !(data->useragent) || 				!strcmp(recext->req_useragent, data->useragent))) {			data->timediff += record->timestamp - data->timestamp;						if (debug_me) {				fprintf(stderr, "process.is_visit: -- %20s (%20s), time: %ld - %ld\n",data->string, data->useragent, record->timestamp, record->timestamp - data->timestamp);			}						data->timestamp = record->timestamp;						/* set last visited page */			if (!hide_field(ext_conf,recweb->req_url, M_WEB_HIDE_REQ_URL)) {				if (data->type == MVISIT_ROBOT) {					data_StrInt *_data;										_data = createStrInt(data->lasturl, 1);					mhash_insert(staweb->indexed_pages, _data);				}				free(data->lasturl);				data->lasturl = malloc(strlen(recweb->req_url)+1);				strcpy(data->lasturl, recweb->req_url);								data->count++;			}			new_entry = 0;							break;		} else {			if (!l->next) break;						l = l->next;		}	}		/* insert the a new visit if the URL should be hidden from the user */	if (new_entry && !hide_field(ext_conf,recweb->req_url, M_WEB_HIDE_REQ_URL)) {		int type = is_robot(recweb->req_url);		data_StrInt *data = createStrInt(recweb->req_url, 1);		mhash_insert(staweb->entry_pages, data);		visited = 1;				if (debug_me) {			fprintf(stderr, "process.is_visit: -> %20s (%20s), time: %ld\n",recweb->req_host, recext ? recext->req_useragent : NULL, record->timestamp);		}				if (l->data) {			mlist *n = mlist_init();			n->data = createVisit(recweb->req_host, recweb->req_url, recext ? recext->req_useragent : NULL, 1, 0, record->timestamp, type);			n->prev = l;			l->next = n;		} else {			l->data = createVisit(recweb->req_host, recweb->req_url, recext ? recext->req_useragent : NULL, 1, 0, record->timestamp, type);		}	}	return visited;}int is_searchengine(mconfig *ext_conf, mstate *state, mlogrec_web_extclf *record) {	config_processor *conf = ext_conf->processor;	int site_found = 0;	int key_found = 0;	char *found_key = NULL;		mlist *l = conf->searchengines;	mstate_web *staweb = state->ext;		if (!l) return 0;		if (!(record->ref_getvars && record->ref_url)) return 0;		while (l && !site_found) {		char *search;		char *c1, *c2, c3;				if (!(l->data)) {			l = l->next;			continue;		}				search = ((data_SubList *)l->data)->string;				c1 = record->ref_getvars;	/* check every /&(.*?)&/ part for a known string */				while (!site_found && (c2 = strchr(c1, '&'))) {			c3 = *c2;			*c2 = '\0';						if (!strncmp(c1, search, strlen(search))) {				data_StrInt *data;				mlist *p = ((data_SubList *)l->data)->sublist;				char *c4;								if ((c4 = strchr(c1, '='))) {					c4++;				} else {					c4 = c1;				}								key_found = 1;				found_key = search;								/* we got the searchkey, let's check the site now */								while (p) {					data_Match *_data;					int n;									_data = p->data;									n = pcre_exec(_data->match, NULL, record->ref_url, strlen(record->ref_url), 0, 0, NULL, 0);										if (n < 0) {						if (n != PCRE_ERROR_NOMATCH) {							fprintf(stderr,"%s.%d: execution error while matching: %d\n", __FILE__, __LINE__, n);						}					} else {						break;					}					p = p->next;				}				if (p) {					data_Match *_data = p->data;									if (_data && _data->string) {						int key = strtol(_data->string,NULL,10);												if (strlen(_data->string) > 2 && 							_data->string[0] == '"' && 							_data->string[strlen(_data->string)-1] == '"') {					/* grouping of the urls */												char *s = malloc(strlen(_data->string));							char *grouped;														strncpy(s, _data->string+1, strlen(_data->string)-1);							s[strlen(_data->string)-2] = '\0';																				if ((grouped = group_field(ext_conf, urlescape(c4), M_WEB_GROUP_SEARCHSTRINGS))) {								data = createStr2Int(grouped, 1, M_GROUP);								mhash_insert(staweb->searchstring, data);								free(grouped);							} else {								data = createStr2Int(urlescape(c4), 1, M_PLAIN);								mhash_insert(staweb->searchstring, data);							}													data = createStr2Int(s, 1, M_GROUP);							mhash_insert(staweb->searchsite, data);													free(s);						} else if (key >= 0) {					/* take the url and be happy */							char *grouped;							if ((grouped = group_field(ext_conf, urlescape(c4), M_WEB_GROUP_SEARCHSTRINGS))) {								data = createStr2Int(grouped, 1, M_GROUP);								mhash_insert(staweb->searchstring, data);								free(grouped);							} else {								data = createStr2Int(urlescape(c4), 1, M_PLAIN);								mhash_insert(staweb->searchstring, data);							}							data = createStr2Int(record->ref_url, 1, M_PLAIN);							mhash_insert(staweb->searchsite, data);						} else if (key < 0) {					/* this is FALSE detection, just ignore it */											} else {							fprintf(stderr, "%s.%d: don't know how the handle searchstring-definition-action: %s\n",__FILE__, __LINE__, _data->string);						}					} else {						char *grouped;						if ((grouped = group_field(ext_conf, urlescape(c4), M_WEB_GROUP_SEARCHSTRINGS))) {							data = createStr2Int(grouped, 1, M_GROUP);							mhash_insert(staweb->searchstring, data);							free(grouped);						} else {							data = createStr2Int(urlescape(c4), 1, M_PLAIN);							mhash_insert(staweb->searchstring, data);						}											data = createStr2Int(record->ref_url, 1, M_PLAIN);						mhash_insert(staweb->searchsite, data);					}					site_found = 1;				} 			}			*c2 = c3;			c1 = ++c2;		}	/* check the last string */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -