⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zquery.pl.in

📁 harvest是一个下载html网页得机器人
💻 IN
📖 第 1 页 / 共 2 页
字号:
#!@PERL@ -w## Query a z39.50 server with zoomsh.# 5dec1999/kjl## Usage:#   for production:#     put it into your /cgi-bin/ and use it via your browser as#	http://localhost/cgi-bin/zquery.pl#   for limited debugging:#     zquery.pl [querystring]## Installation:# Put this script to your cgi-bin directory of the http daemon.# Edit $::HTML_BASE to point to the directory where the supplemental html# pages are.# Additionally, you can modify print_html_* () to customize the appearance# of the html pages.#use CGI 'param';use CGI::Carp;use CGI::Carp qw(fatalsToBrowser);use URI::Escape;use Time::HiRes qw(gettimeofday tv_interval);use integer;use strict;$::DEBUG = param ('DEBUG');$::DEBUG = 0 unless ($::DEBUG);############################## configuration starts here ############################### where am I installed?# this script will find out its location when started as cgi script.# If you want to use it under mod_perl, move this script to your perl# modules directory and change the path.##########################################$::ME = $ENV { 'SCRIPT_NAME' };$::ME = "/Harvest/cgi-bin/zquery.pl" unless $::ME;# prefix to supplemental html pages$::HTML_BASE = "../brokers";# base URL to XML objects$::DATA_URL = "../brokers/tengu.local/objects-xml" if ($::DEBUG);# where is the zebrasrv and the database to use$::ZURL = "localhost:9999/harvest";	 # how to contact z39.50 server# some default values# you can override these with the values of your query form$::OP	    = "and";	# default boolean operator for queries ("and", "or")$::N_LINES  = 3;	# default number of lines per hit to show$::PER_PAGE = 10;	# default number of hits per page to show# end of configuration################################################################################ where is zoomsh$::ZOOMSH = "@prefix@/bin/zoomsh";# do some sanity checkstest_zebra ();# parse arguments# no arguments# we must be called as a cgi script$::query = param ('query') if ($#ARGV == -1);# we found some arguments$::query = join (" ", @ARGV) if ($#ARGV >= 0);if (! $::query) {    print_html_header ("Harvest Search");    print_html_query_form ();} else {    # page to show    $::page = param ('page');    $::page = 0 if (! $::page);    # number of hits to show per page    $::per_page = param ('per_page');    $::per_page = $::PER_PAGE if (! $::per_page);    # number of lines per entry to show    $::n_lines = param ('n_lines');    $::n_lines = $::N_LINES if (! $::n_lines);    # how to combine query words    $::op = param ('op');    $::op = $::OP if (! $::op);    @::terms = split_query ($::query);    # build query string in zoomsh notation    $::z_query = build_query ($::query, $::op, @::terms);    $::start = $::page * $::per_page;    $::zoomq =  "$::ZOOMSH \"connect $::ZURL\"";    $::zoomq .= " \"set preferredRecordSyntax XML\"";    $::zoomq .= " \"search $::z_query\"";    $::zoomq .= " \"show $::start $::per_page\" quit";    print_html_header ("Search result for \"$::query\"");    print_html_query_form ($::query);    print "Query sent to Zebra server:<br>\n<b>$::z_query</b>\n<p>\n"	if ($::DEBUG);    print "<b>$::query:</b>";    $::t0 = [gettimeofday];    #($::num_hit, $::aref) = read_hash_zoomsh ();    ($::num_hit, $::aref) = read_hash_zoomsh_xml ();    if ($::num_hit <= 0) {	print_html_nomatch ();    } else {	@::hits = @{$::aref};	$::elapsed = tv_interval ($::t0);	# output	print_num_hit ($::num_hit);	printf (" in %.3f seconds\n", $::elapsed);	print_hit ($::page, $::per_page, \@::hits, @::terms)	    if ($::num_hit && $::num_hit > 0);	# print pager navigation links	print_html_page_index (uri_escape ($::query), $::num_hit,			       $::page, $::per_page)	    if ($::num_hit > $::per_page);    }}print_html_footer ();############################################## execution ends here			    ############################################################################################ subroutines				    ############################################################################################ html header				    ##   args: title				    ##############################################sub print_html_header {    my $title = shift;    print << "EOF";Content-type: text/html<html><head><title>$title</title><style><!--div.pager A { color: black; }//--></style></head><body bgcolor="#ffffff" text="#000000" link="#0000ff" vlink="#000080" alink="#ff0000">EOF}############################################## html end tags				    ##############################################sub print_html_footer {    print << "EOF";<hr size=3 width=90% noshade><center><a href="http://harvest.sourceforge.net/"><b>About Harvest</b></a></center></body></html>EOF}############################################## query form				    ##   args: default querystring value	    ##############################################sub print_html_query_form {    my $query = shift;    $::op = $::OP unless ($::op);    $::n_lines = $::N_LINES unless ($::n_lines);    $::per_page = $::PER_PAGE unless ($::per_page);    print << "EOF";<table><tr><td><a href="$::HTML_BASE\/"><h1>Harvest</h1></a></td><td>&nbsp;&nbsp;&nbsp;</td><td><form action=$::ME method=get><nobr><input type=text name=queryEOF    if ($query) {	$query =~ s/\"/&quot;/g;	print " value=\"$query\"";    }    print << "EOF"; size=40 maxsize=200><input type=submit name=button value="Harvest Search"><a href="$::HTML_BASE/help.html">Help</a></nobr><input type=hidden name=op value=$::op><input type=hidden name=n_lines value=$::n_lines><input type=hidden name=per_page value=$::per_page>EOF    print "<input type=hidden name=DEBUG value=$::DEBUG>\n" if ($::DEBUG);    print << "EOF";</form></td></tr></table>EOF}############################################## print number of hits for a query	    ## args: querystring, number_of_hits	    ##############################################sub print_num_hit {    my $num_hit = shift;    if ($num_hit == 1) {	print " $num_hit match";    } else {	print " $num_hit matches";    }    print " found";}#################################################################### split string to words or group of words when enclosed in quotes ## args: string							  ## returns: array with words, quoted words are kept in groups	  ####################################################################sub split_query {    my $query = shift;    my (@term, $i, $quoted, @res);    $query =~ s/^\s+//g;    # remove leading whitespaces    $query =~ s/\s+$//g;    # remove trailing whitespaces    @term = split /\s+/, $query;    $quoted = 0;    $i = 0;    foreach (@term) {	if ($quoted) {	    $res[$i] .= " $_";	} else {	    $res[$i] = $_;	}	$quoted = 1 if (/^\"/);	if ($quoted) {	    $quoted = 0 if (/\S+\"$/);	}	$i++ if (!$quoted);    }    for ($i = 0; $i <= $#res; $i++) {	$res[$i] =~ s/\"$//g if ($res[$i] =~ s/^\"//g);    }    return @res;}########################################## highlight search terms		## args:					##   line to highlight			##   array with search terms		## returns:				##   line with highlighting		##########################################sub highlight {    my $line  = shift;    my @terms = @_;    foreach (@terms) {	next if (/^\-|[\.\*\+\?]/);	$line =~ s/(\b$_\b)/<b>$1<\/b>/ig;    }    return $line;}########################################## print hits				## arguments:				##   number of page			##   hits per page			##   hash of hit arrays			##   array of search terms		##########################################sub print_hit {    my $page     = shift;    my $per_page = shift;    my $aref     = shift;    my @terms    = @_;    my (@hits, $i, $entry, $key,	$title, $last_modification_time, $file_size, $url, $full_text,	@date);    @hits = @{$aref};    $entry = $page * $per_page;    for $i (0 .. $#hits) {	$entry++;	$url			= $hits [$i] { 'url' };	$last_modification_time = $hits [$i] { 'last-modification-time' };	$file_size		= $hits [$i] { 'file-size' } / 1024;	$file_size		= 1 if ($file_size == 0);	$title			= $hits [$i] { 'title' };	$full_text		= $hits [$i] { 'full-text' };	$full_text		= highlight ($full_text, @terms);	@date = gmtime ($last_modification_time);	$last_modification_time = sprintf ("%d-%02d-%02d",					   $date[5] + 1900, $date[4] + 1,					   $date[3]);	print "<p>\n";	print "<b>$entry. </b>" if ($::DEBUG);	print << "EOF";<a href="$url"><b>$title</b></a><br>$full_text<br>EOF	$url =~ s/^http\:\/\///g;	print "$url <b>$file_size" . "KB $last_modification_time</b>\n";	if ($::DEBUG) {	    my $score    = $hits [$i] { 'score' };	    my $filename = $hits [$i] { 'filename' };	    $filename =~ s/\S+\/objects-xml\///g;	    print "<br>\n";	    print "Relevance score: $score\n<br>\n";	    print "<a href=\"$::DATA_URL/$filename\">xsoif file: $filename</a>\n";	}    }}################################ print page-navigation links ## args:			      ##   querystring		      ##   number of hits	      ##   page number		      ##   number of items per page  ################################sub print_html_page_index {    my $query    = shift;    my $num_hit  = shift;    my $page     = shift;    my $per_page = shift;    my ($url, $num_pages, $start, $i);    $url =  "$::ME";    $url .= "?query=$query";    $url .= "&op=$::op";    $url .= "&n_lines=$::n_lines";    $url .= "&per_page=$per_page";    $url .= "&DEBUG=$::DEBUG" if ($::DEBUG);    $num_pages = $num_hit / $per_page;    $num_pages++ if ($num_hit % $per_page);    print "<center>\n";    print "<div class=pager>\n";    print "<a href=$url&page=" . ($page - 1) . ">Previous</a>&nbsp;\n"	if ($page > 0);    $start = $page - 5 if ($page > 5);    $start = $start - ($page + 6 - $num_pages) if ($num_pages <= $page + 5);    $start = 0 if (!defined $start || $start < 0);    for ($i = $start; $i < $page; $i++) {	print "<a href=$url&page=$i>" . ($i + 1) . "</a>&nbsp;\n";    }    print "<font color=red><b>";    print $page + 1;    print "</b></font>&nbsp;\n";    for ($i = $page + 1; $i < $num_pages && $i < $start + 11; $i++) {	print "<a href=$url&page=$i>" . ($i + 1) . "</a>&nbsp;\n";    }    print "<a href=$url&page=" . ($page + 1) . ">Next</a>\n"	if ($page < $num_pages - 1);    print "</div>\n";    print "</center>\n";}############################################## print error page and exit		    ## args: error message			    ##############################################

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -