⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zquery-gils.pl.in

📁 harvest是一个下载html网页得机器人
💻 IN
📖 第 1 页 / 共 2 页
字号:
#!@PERL@ -w## Query a z39.50 server with zoomsh.# 5dec1999/kjl## Usage:#   for production:#     put it into your /cgi-bin/ and use it via your browser as#	http://localhost/cgi-bin/zquery.pl#   for limited debugging:#     zquery.pl [querystring]## Installation:# Edit path to point to the directory where your zoomsh binary is and# put this script to your cgi-bin directory of the http daemon.# Additionally, you can modify print_html_* () to customize the appearance# of the html pages.## Some notes about parsing output from zoomsh:# read_hash_zoomsh () reads the output from zoomsh and builds a hash# containing data received from z39.50 server. It uses a simple and# inefficient parse mechanism without validity check of the returned data.# It works well enough to experiment with the search system.## If you want to roll your own parser, here are some non special cases:## - lines are continued at next line with leading whitespaces like this:#   title: This is a very long title line which does not fit in one line and#     it will be continued at next line, like this## - long lines can look like this:#   linkage: http://do.main/path/page.html#	       or#   linkage:#     http://do.main/path/page.html## - lines longer than 77 characters are broken into two lines by putting a#   "=" at position 77 and continued at next line.#use CGI 'param';use CGI::Carp;use CGI::Carp qw(fatalsToBrowser);use URI::Escape;use integer;use strict;$::DEBUG = param ('DEBUG');$::DEBUG = 0 unless ($::DEBUG);############################## configuration starts here ############################### where am I installed?# this script will find out its location when started as cgi script.# If you want to use it under mod_perl, move this script to your perl# modules directory and change the path.##########################################$::ME = $ENV { 'SCRIPT_NAME' };$::ME = "/Harvest/cgi-bin/zquery.pl" unless $::ME;# path to raw zebra data#$::DATA_STORE = "http://tengu.arco.de/Harvest/brokers/tengu.arco.de/objects/";# prefix to supplemental html pages$::HTML_BASE = "/Harvest/brokers/zquery";# make sure zoomsh is in path$ENV{'PATH'} = "@prefix@/bin";# where is the zebrasrv and the database to use$::ZURL = "localhost:9999/harvest";	 # how to contact z39.50 server# some default values# you can override these with the values of your query form$::OP	    = "and";	# default boolean operator for queries#$::OP	     = "or";	 # "and" and "or" allowed$::N_LINES  = 3;	# default number of lines per hit to show$::PER_PAGE = 10;	# default number of hits per page to show# end of basic configuration# exit after this number of secondsalarm (60);# parse arguments# no arguments# we must be called as a cgi script$::query = param ('query') if ($#ARGV == -1);# we found some arguments$::query = join (" ", @ARGV) if ($#ARGV >= 0);if ($::query) {    # page to show    $::page = param ('page');    $::page = 0 if (! $::page);    # number of hits to show per page    $::per_page = param ('per_page');    $::per_page = $::PER_PAGE if (! $::per_page);    # number of lines per entry to show    $::n_lines = param ('n_lines');    $::n_lines = $::N_LINES if (! $::n_lines);    # how to combine query words    $::op = param ('op');    $::op = $::OP if (! $::op);    # build query string in zoomsh notation    $::z_query = build_query ($::query, $::op);    $::start = $::page * $::per_page;    $::zoomsh =  "zoomsh \"connect $::ZURL\" \"search $::z_query\"";    $::zoomsh .= " \"show $::start $::per_page\" quit";    ($::num_hit, $::aref) = read_hash_zoomsh ();    @::hits = @{$::aref};    # output    print_html_header ("Search result for \"$::query\"");    print_html_query_form ($::query);    print_z3950_query ($::z_query)		  if ($::DEBUG);    print_num_hit ($::query, $::num_hit);    print_hit ($::page, $::per_page, \@::hits)	  if ($::num_hit && $::num_hit > 0);    # print pager navigation links    print_html_page_index (uri_escape ($::query), $::num_hit,			   $::page, $::per_page)	if ($::num_hit && $::num_hit > $::per_page);} else { # if ($query)    print_html_header ("Z39.50 Search");    print_html_query_form ();}print_html_footer ();############################################## execution ends here			    ############################################################################################ subroutines				    ############################################################################################ html header				    ##   args: title				    ##############################################sub print_html_header {    my $title = shift;    print << "EOF";Content-type: text/html<html><head><title>$title</title><style><!--div.pager A { color: black; }//--></style></head><body bgcolor="#ffffff" text="#000000" link="#0000ff" vlink="#000080" alink="#ff0000">EOF}############################################## html end tags				    ##############################################sub print_html_footer {    print << "EOF";<hr size=3 width=90% noshade><center><b><a href="$::HTML_BASE/help.html"><nobr>[Help formulating queries]</nobr></a><a href="$::HTML_BASE/register.html"><nobr>[Register new pages]</nobr></a><a href="$::HTML_BASE/contact.html"><nobr>[Contact information]</nobr></a><a href="$::HTML_BASE/about.html"><nobr>[About this search system]</nobr></a></b></center><br><br></body></html>EOF}############################################## query form				    ##   args: default querystring value	    ##############################################sub print_html_query_form {    my $query = shift;    $::op = $::OP unless ($::op);    $::n_lines = $::N_LINES unless ($::n_lines);    $::per_page = $::PER_PAGE unless ($::per_page);    print << "EOF";<h1>Harvest Search</h1><form action=$::ME method=get><center><nobr><input type=text name=queryEOF    if ($query) {	$query =~ s/\"/&quot;/g;	print " value=\"$query\"";    }    print << "EOF"; size=40 maxsize=200><input type=submit name=what value=Search></nobr></center><input type=hidden name=op value=$::op><input type=hidden name=n_lines value=$::n_lines><input type=hidden name=per_page value=$::per_page><input type=hidden name=DEBUG value=$::DEBUG></form>EOF}############################################## print z39.50 query sent to server	    ##############################################sub print_z3950_query {    my $query = shift;    if ($query) {	print "Query sent to Z39.50 server: <b>$query</b><br>\n";    } else {	print "No query sent to Z39.50 server.<br>\n";    }}############################################## print number of hits for a query	    ## args: querystring, number_of_hits	    ##############################################sub print_num_hit {    my $query = shift;    my $num_hit = shift;    print "<b>$query:</b> ";    if (! $num_hit) {	print "no match";    } elsif ($num_hit == 1) {	print "$num_hit match";    } else {	print "$num_hit matches";    }    print " found\n";    print "<br><br>\n";}############################################## print hits				    ## arguments:				    ##   number of page			    ##   hits per page			    ##   hash of hit arrays			    ##############################################sub print_hit {    my ($page, $per_page, $aref, @hits, $i, $entry, $key); #, $obj_data);    my ($rank, $title, $dateOfLastModification, $bytes, $linkage, $sampleText);    my (@date);    $page = shift;    $per_page = shift;    $aref = shift;    @hits = @{$aref};    $entry = $page * $per_page;    for $i (0 .. $#hits) {	$entry++;	$rank			= $hits [$i] { 'rank' };	$linkage		= $hits [$i] { 'linkage' };	$dateOfLastModification = $hits [$i] { 'dateOfLastModification' };	$bytes			= $hits [$i] { 'bytes' };	$title			= $hits [$i] { 'title' };	$sampleText		= $hits [$i] { 'sampleText' };	@date = gmtime ($dateOfLastModification);	$date[4]++;	$date[5] += 1900;	$dateOfLastModification = $date[5] . "-" . $date[4] . "-" . $date[3];	print << "EOF";<b>$entry. <a href="$linkage">$title</a></b><br>$sampleText<br>$linkage<br>Document size: <b>$bytes Bytes</b>,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -