⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 analyze-broker-log.pl

📁 harvest是一个下载html网页得机器人
💻 PL
字号:
: # *-*-perl-*-*    eval 'exec perl -S $0 "$@"'    if $running_under_some_shell;# analyze-broker-log.pl## usage:##      % analyze-broker-log.pl [-start yyyymmdd] [-stop yyyymmdd] [file [file]]#      % analyze-broker-log.pl [-ndays n] [file [file]]## example:##      % analyze-broker-log.pl broker.out#      % analyze-broker-log.pl -start 19991010 broker.out#      % analyze-broker-log.pl -start 19991010 -stop 19991020 broker.out#      % analyze-broker-log.pl -ndays 10 broker.out## $Id: analyze-broker-log.pl,v 2.2 2000/02/03 12:45:54 sxw Exp $#################################################################################  Harvest Indexer http://harvest.sourceforge.net/#  -----------------------------------------------##  The Harvest Indexer is a continued development of code developed by#  the Harvest Project. Development is carried out by numerous individuals#  in the Internet community, and is not officially connected with the#  original Harvest Project or its funding sources.##  Please mail lee@arco.de if you are interested in participating#  in the development effort.##  This program is free software; you can redistribute it and/or modify#  it under the terms of the GNU General Public License as published by#  the Free Software Foundation; either version 2 of the License, or#  (at your option) any later version.##  This program is distributed in the hope that it will be useful,#  but WITHOUT ANY WARRANTY; without even the implied warranty of#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the#  GNU General Public License for more details.##  You should have received a copy of the GNU General Public License#  along with this program; if not, write to the Free Software#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.#use Time::Local qw(timelocal);$start = 0;$stop  = 99999999;@x = split (/\//, $0); $me = pop @x;$USAGE="$me -start day -stop day files ...\n";@files = ();while ($arg = shift) {	unless ($arg =~ /^-/) {		push (@files, $arg);		next;	}	if ($arg eq '-start') {		$start = shift;	}	elsif ($arg eq '-stop') {		$stop = shift;	}	elsif ($arg eq '-ndays') {		($n = shift)--;		@t = localtime(time);		$stop = sprintf ("%04d%02d%02d", $t[5]+1900,$t[4]+1,$t[3]);		@t = localtime(time-86400*$n);		$start = sprintf ("%04d%02d%02d", $t[5]+1900,$t[4]+1,$t[3]);	}	else {		die $USAGE;	}}@ARGV = @files;while (<>) {	next unless (/^\s*([^\s]+)\s+(\d+)\s+([\d:]+).*$/);	chop;	$who = $1;	$day = $2;	$hms = $3;	chop $hms;	next if ($day < $start);	last if ($day > $stop);	if ($day =~ /(\d\d\d\d)(\d\d)(\d\d)/) {		$yr = $1;		$mo = $2;		$da = $3;	}	if ($hms =~ /(\d\d):(\d\d):(\d\d)/) {		$hr = $1;		$mi = $2;		$se = $3;	}	$time = timelocal ($se,$mi,$hr,$da,$mo-1,$yr);	$start_time = $time unless defined $start_time;	$stop_time  = $time;	if (/Processing Query/i) {		$query_start = $time;		$query_request_count++;		$hash_desc++ if (/#desc/);		$hash_opaq++ if (/#opaque/);		$hash_index_error{$1}++		if (/#index error (\d+)/);		$hash_index_maxresult{$1}++	if (/#index maxresult (\d+)/);		$hash_index_case{$1}++		if (/#index case (\w+)/);		$hash_index_matchword++		if (/#index matchword/);		if (/#END (.*)$/) {			$query =~ s/\r//;			$query = $1;			if ($query =~ /^".*"$/) {				$quoted_query++;			} else {				$unquoted_query++;				@terms = split (/\s+/, $query);				$n = $#terms+1;				$n_query_terms{$n}++;	#print "$query\n" if ($n % 2 == 0);	#print "$query\n" if ($n > 6);			}			$query_op{'AND'}++	if ($query =~ /\s+and\s+/i);			$query_op{'OR'}++	if ($query =~ /\s+or\s+/i);		}	}	elsif (/Query returned (\d+)/i) {		$num_results = $1;		$query_stop = $time;		$query_result_count++;		$dt = $query_stop - $query_start;		$bin = int ($dt / 10);		$bin = 14 if ($bin > 14);		$QueryTime{$bin}++;		if ($num_results > 0) {			$bin = int (5 * log ($num_results) / log(10));			$bin = 13 if ($bin > 13);		} else {			$bin = 'zero';		}		$NumResults{$bin}++;	}	elsif (/query is invalid/i) {		$query_stop = $time;		$query_invalid_count++;	}	elsif (/Client is gone/i) {		$query_stop = $time;		$query_aborted_count++;	}	elsif (/exceeded lifetime/i) {		$query_lifetime_count++;	}}if ($query_request_count == 0) {	print "No search requests logged.\n";	exit 0;}chop ($T1 = localtime($start_time));chop ($T2 = localtime($stop_time));printf ("Broker Logfile Analysis for the %4.1f day period, starting\n",    ($stop_time - $start_time) / 86400);printf ("%s, and ending %s\n", $T1, $T2);print "\n";printf ("%20s: %5d\n",	'Query Requests',	$query_request_count);printf ("%20s: %5d (%2d%%)\n",	'Query Results',	$query_result_count,	100 * $query_result_count / $query_request_count + 0.5);printf ("%20s: %5d (%2d%%)\n",	'Invalid Queries',	$query_invalid_count,	100 * $query_invalid_count / $query_request_count + 0.5);printf ("%20s: %5d (%2d%%)\n",	'Aborted Queries',	$query_aborted_count,	100 * $query_aborted_count / $query_request_count + 0.5);printf ("%20s: %5d (%2d%%)\n",	'Lifetime Exceeded',	$query_lifetime_count,	100 * $query_lifetime_count / $query_request_count + 0.5);print "\n";printf ("Averaged %.1f queries received per day\n",	86400 * $query_request_count / ($stop_time - $start_time));print "\n";print <<EOF;seconds                     QUERY RESPONSE TIME                  percent------- ------------------------------------------------------------ ---EOFforeach $v ( values %QueryTime ) {	$QueryTime{'max'} = $v if ($QueryTime{'max'} < $v);	$QueryTime{'sum'} += $v;}for ($i=0; $i<15; $i++) {	$len = int (60 * $QueryTime{$i} / $QueryTime{'max'});	printf ("%3d-%3d ", $i*10, ($i+1)*10-1);	print '*' x $len;	print ' ' x (60-$len);	printf (" %2d%% ", 100 * $QueryTime{$i} / $QueryTime{'sum'} + 0.5);	print "\n";}print "\n";print <<EOF;count                     NUMBER OF MATCHED OBJECTS              percent------- ------------------------------------------------------------ ---EOFforeach $v ( values %NumResults ) {	$NumResults{'max'} = $v if ($NumResults{'max'} < $v);	$NumResults{'sum'} += $v;}foreach $i ('zero', 0..13) {	$len = int (40 * $NumResults{$i} / $NumResults{'max'});	if ($i eq 'zero') {		printf ("      0 ");	} else {		$lo = ciel (10**($i/5));		$hi = ciel (10**(($i+1)/5)) - 1;		printf ("%3d-%3d ", $lo, $hi);	}	print '*' x $len;	print ' ' x (60-$len);	printf (" %2d%% ", 100 * $NumResults{$i} / $NumResults{'sum'} + 0.5);	print "\n";}printf ("\nQuery Options ($query_request_count total queries):\n");printf ("%20s: %5d %2d%%\n",	'description',	$hash_desc,	100 * $hash_desc / $query_request_count);printf ("%20s: %5d %2d%%\n",	'opaque',	$hash_opaq,	100 * $hash_opaq / $query_request_count);printf ("%20s: %5d %2d%%\n",	'matchword',	$hash_index_matchword,	100 * $hash_index_matchword / $query_request_count);foreach $x ( sort numcmp keys %hash_index_error ) {	printf ("%20s: %5d %2d%%\n",		'error ' . $x,		$hash_index_error{$x},		100 * $hash_index_error{$x} / $query_request_count);}foreach $x ( sort numcmp keys %hash_index_maxresult ) {	printf ("%20s: %5d %2d%%\n",		'maxresult ' . $x,		$hash_index_maxresult{$x},		100 * $hash_index_maxresult{$x} / $query_request_count);}foreach $x ( sort keys %hash_index_case ) {	printf ("%20s: %5d %2d%%\n",		'case ' . $x,		$hash_index_case{$x},		100 * $hash_index_case{$x} / $query_request_count);}printf ("\nQuery Terms ($query_request_count total queries):\n");printf ("%20s: %5d %2d%%\n",	'quoted query',	$quoted_query,	100 * $quoted_query / $query_request_count);printf ("%20s: %5d %2d%%\n",	'unquoted query',	$unquoted_query,	100 * $unquoted_query / $query_request_count);foreach $x ( sort numcmp keys %n_query_terms ) {	printf ("%20s: %5d %2d%%\n",		$x . ' query terms  ',		$n_query_terms{$x},		100 * $n_query_terms{$x} / $query_request_count);}foreach $x ( sort numcmp keys %query_op ) {	printf ("%20s: %5d %2d%%\n",		"queries with '$x'",		$query_op{$x},		100 * $query_op{$x} / $query_request_count);}print <<EOF;NOTE: count of query terms includes operations, so "this AND that"counts as three query terms.EOFexit 0;#sub floor {#	my $x = shift;#	int($x);#}sub ciel {	my $x = shift;	$x += 1.0 unless ($x == int($x));	int($x);}sub numcmp {	$a <=> $b;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -