📄 analyze-broker-log.pl
字号:
: # *-*-perl-*-* eval 'exec perl -S $0 "$@"' if $running_under_some_shell;# analyze-broker-log.pl## usage:## % analyze-broker-log.pl [-start yyyymmdd] [-stop yyyymmdd] [file [file]]# % analyze-broker-log.pl [-ndays n] [file [file]]## example:## % analyze-broker-log.pl broker.out# % analyze-broker-log.pl -start 19991010 broker.out# % analyze-broker-log.pl -start 19991010 -stop 19991020 broker.out# % analyze-broker-log.pl -ndays 10 broker.out## $Id: analyze-broker-log.pl,v 2.2 2000/02/03 12:45:54 sxw Exp $################################################################################# Harvest Indexer http://harvest.sourceforge.net/# -----------------------------------------------## The Harvest Indexer is a continued development of code developed by# the Harvest Project. Development is carried out by numerous individuals# in the Internet community, and is not officially connected with the# original Harvest Project or its funding sources.## Please mail lee@arco.de if you are interested in participating# in the development effort.## This program is free software; you can redistribute it and/or modify# it under the terms of the GNU General Public License as published by# the Free Software Foundation; either version 2 of the License, or# (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.## You should have received a copy of the GNU General Public License# along with this program; if not, write to the Free Software# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.#use Time::Local qw(timelocal);$start = 0;$stop = 99999999;@x = split (/\//, $0); $me = pop @x;$USAGE="$me -start day -stop day files ...\n";@files = ();while ($arg = shift) { unless ($arg =~ /^-/) { push (@files, $arg); next; } if ($arg eq '-start') { $start = shift; } elsif ($arg eq '-stop') { $stop = shift; } elsif ($arg eq '-ndays') { ($n = shift)--; @t = localtime(time); $stop = sprintf ("%04d%02d%02d", $t[5]+1900,$t[4]+1,$t[3]); @t = localtime(time-86400*$n); $start = sprintf ("%04d%02d%02d", $t[5]+1900,$t[4]+1,$t[3]); } else { die $USAGE; }}@ARGV = @files;while (<>) { next unless (/^\s*([^\s]+)\s+(\d+)\s+([\d:]+).*$/); chop; $who = $1; $day = $2; $hms = $3; chop $hms; next if ($day < $start); last if ($day > $stop); if ($day =~ /(\d\d\d\d)(\d\d)(\d\d)/) { $yr = $1; $mo = $2; $da = $3; } if ($hms =~ /(\d\d):(\d\d):(\d\d)/) { $hr = $1; $mi = $2; $se = $3; } $time = timelocal ($se,$mi,$hr,$da,$mo-1,$yr); $start_time = $time unless defined $start_time; $stop_time = $time; if (/Processing Query/i) { $query_start = $time; $query_request_count++; $hash_desc++ if (/#desc/); $hash_opaq++ if (/#opaque/); $hash_index_error{$1}++ if (/#index error (\d+)/); $hash_index_maxresult{$1}++ if (/#index maxresult (\d+)/); $hash_index_case{$1}++ if (/#index case (\w+)/); $hash_index_matchword++ if (/#index matchword/); if (/#END (.*)$/) { $query =~ s/\r//; $query = $1; if ($query =~ /^".*"$/) { $quoted_query++; } else { $unquoted_query++; @terms = split (/\s+/, $query); $n = $#terms+1; $n_query_terms{$n}++; #print "$query\n" if ($n % 2 == 0); #print "$query\n" if ($n > 6); } $query_op{'AND'}++ if ($query =~ /\s+and\s+/i); $query_op{'OR'}++ if ($query =~ /\s+or\s+/i); } } elsif (/Query returned (\d+)/i) { $num_results = $1; $query_stop = $time; $query_result_count++; $dt = $query_stop - $query_start; $bin = int ($dt / 10); $bin = 14 if ($bin > 14); $QueryTime{$bin}++; if ($num_results > 0) { $bin = int (5 * log ($num_results) / log(10)); $bin = 13 if ($bin > 13); } else { $bin = 'zero'; } $NumResults{$bin}++; } elsif (/query is invalid/i) { $query_stop = $time; $query_invalid_count++; } elsif (/Client is gone/i) { $query_stop = $time; $query_aborted_count++; } elsif (/exceeded lifetime/i) { $query_lifetime_count++; }}if ($query_request_count == 0) { print "No search requests logged.\n"; exit 0;}chop ($T1 = localtime($start_time));chop ($T2 = localtime($stop_time));printf ("Broker Logfile Analysis for the %4.1f day period, starting\n", ($stop_time - $start_time) / 86400);printf ("%s, and ending %s\n", $T1, $T2);print "\n";printf ("%20s: %5d\n", 'Query Requests', $query_request_count);printf ("%20s: %5d (%2d%%)\n", 'Query Results', $query_result_count, 100 * $query_result_count / $query_request_count + 0.5);printf ("%20s: %5d (%2d%%)\n", 'Invalid Queries', $query_invalid_count, 100 * $query_invalid_count / $query_request_count + 0.5);printf ("%20s: %5d (%2d%%)\n", 'Aborted Queries', $query_aborted_count, 100 * $query_aborted_count / $query_request_count + 0.5);printf ("%20s: %5d (%2d%%)\n", 'Lifetime Exceeded', $query_lifetime_count, 100 * $query_lifetime_count / $query_request_count + 0.5);print "\n";printf ("Averaged %.1f queries received per day\n", 86400 * $query_request_count / ($stop_time - $start_time));print "\n";print <<EOF;seconds QUERY RESPONSE TIME percent------- ------------------------------------------------------------ ---EOFforeach $v ( values %QueryTime ) { $QueryTime{'max'} = $v if ($QueryTime{'max'} < $v); $QueryTime{'sum'} += $v;}for ($i=0; $i<15; $i++) { $len = int (60 * $QueryTime{$i} / $QueryTime{'max'}); printf ("%3d-%3d ", $i*10, ($i+1)*10-1); print '*' x $len; print ' ' x (60-$len); printf (" %2d%% ", 100 * $QueryTime{$i} / $QueryTime{'sum'} + 0.5); print "\n";}print "\n";print <<EOF;count NUMBER OF MATCHED OBJECTS percent------- ------------------------------------------------------------ ---EOFforeach $v ( values %NumResults ) { $NumResults{'max'} = $v if ($NumResults{'max'} < $v); $NumResults{'sum'} += $v;}foreach $i ('zero', 0..13) { $len = int (40 * $NumResults{$i} / $NumResults{'max'}); if ($i eq 'zero') { printf (" 0 "); } else { $lo = ciel (10**($i/5)); $hi = ciel (10**(($i+1)/5)) - 1; printf ("%3d-%3d ", $lo, $hi); } print '*' x $len; print ' ' x (60-$len); printf (" %2d%% ", 100 * $NumResults{$i} / $NumResults{'sum'} + 0.5); print "\n";}printf ("\nQuery Options ($query_request_count total queries):\n");printf ("%20s: %5d %2d%%\n", 'description', $hash_desc, 100 * $hash_desc / $query_request_count);printf ("%20s: %5d %2d%%\n", 'opaque', $hash_opaq, 100 * $hash_opaq / $query_request_count);printf ("%20s: %5d %2d%%\n", 'matchword', $hash_index_matchword, 100 * $hash_index_matchword / $query_request_count);foreach $x ( sort numcmp keys %hash_index_error ) { printf ("%20s: %5d %2d%%\n", 'error ' . $x, $hash_index_error{$x}, 100 * $hash_index_error{$x} / $query_request_count);}foreach $x ( sort numcmp keys %hash_index_maxresult ) { printf ("%20s: %5d %2d%%\n", 'maxresult ' . $x, $hash_index_maxresult{$x}, 100 * $hash_index_maxresult{$x} / $query_request_count);}foreach $x ( sort keys %hash_index_case ) { printf ("%20s: %5d %2d%%\n", 'case ' . $x, $hash_index_case{$x}, 100 * $hash_index_case{$x} / $query_request_count);}printf ("\nQuery Terms ($query_request_count total queries):\n");printf ("%20s: %5d %2d%%\n", 'quoted query', $quoted_query, 100 * $quoted_query / $query_request_count);printf ("%20s: %5d %2d%%\n", 'unquoted query', $unquoted_query, 100 * $unquoted_query / $query_request_count);foreach $x ( sort numcmp keys %n_query_terms ) { printf ("%20s: %5d %2d%%\n", $x . ' query terms ', $n_query_terms{$x}, 100 * $n_query_terms{$x} / $query_request_count);}foreach $x ( sort numcmp keys %query_op ) { printf ("%20s: %5d %2d%%\n", "queries with '$x'", $query_op{$x}, 100 * $query_op{$x} / $query_request_count);}print <<EOF;NOTE: count of query terms includes operations, so "this AND that"counts as three query terms.EOFexit 0;#sub floor {# my $x = shift;# int($x);#}sub ciel { my $x = shift; $x += 1.0 unless ($x == int($x)); int($x);}sub numcmp { $a <=> $b;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -