📄 zquery.pl.in
字号:
sub print_html_error { my $reason = shift; print_html_header ("Service temporarily not available"); print "<h1 align=center>Service temporarily not available</h1>\n"; print "<center>Reason: <b>$reason</b>.</center>\n"; print_html_footer (); exit;}############################################## print timeout message and exit ##############################################sub print_html_timeout { kill 1, $::zoompid; print << "EOF";<p>Query timed out.<p>Please:<ul><li>Choose a more specific search term.<li>Choose more search terms.</ul><p>EOF print_html_footer (); exit 1;}############################################## print brief query hint ##############################################sub print_html_nomatch { print << "EOF";<p>No documents were found matching your query.<p>Please:<ul><li>Verify that the spelling of the search terms were correct.<li>Choose fewer search terms.<li>Choose more general search terms.</ul><p>EOF}########################################################### test that everything necessary is in place and running ## args: none ###########################################################sub test_zebra { # can't find zoomsh print_html_error ("Can't run $::ZOOMSH") if (!-x "$::ZOOMSH"); # can't connect to zebrasrv open (IN, "$::ZOOMSH \"connect $::ZURL\" quit|"); print_html_error ("Can't connect to zebrasrv at $::ZURL") if (<IN>); close (IN);}## This reads the output from zoomsh and builds a hash containing data received# from z39.50 server. It uses a simple parser without validity check of the# returned data, but works well enough to experiment with the search system.## Here are some caveats when parsing the results produced by zoomsh:## - lines are continued at next line with leading whitespaces like this:# title: This is a very long title line which does not fit in one line and# it will be continued at next line, like this## - long lines can look like this:# linkage: http://do.main/path/page.html# or# linkage:# http://do.main/path/page.html## - lines longer than 77 characters are broken into two lines by putting a# "=" at position 77 and continued at next line.#################################################### read search results from zoomsh ## args: none ## returns: ## number of total hits ## hash with search result ###################################################sub read_hash_zoomsh { my ($num_hit, @hits, $i, $j, $tmp); # exit after this number of seconds alarm (60); $SIG{ALRM} = \&print_html_timeout; open (IN, "$::zoomq|"); $_ = <IN>; # parse number of hits ($num_hit) = /^.*: (\d+) hits$/; # build a list of hashes with hits $i = -1; $_ = <IN>; while ($_) { # one liners if (/^xsoif:$/) { # if (/^local-control-number: \d+$/) { $i++; $_ = <IN>; next; } if (/^\s+rank: \d+$/) { ($hits [$i] { 'rank' }) = /^\s+rank: (\d+)$/; $_ = <IN>; next; } if (/^\s+last-modification-time: \d+$/) { ($hits [$i] { 'last-modification-time' }) = /^\s+last-modification-time: (\d+)$/; $_ = <IN>; next; } if (/^\s+file-size: \d+$/) { ($hits [$i] { 'file-size' }) = /^\s+file-size: (\d+)$/; $_ = <IN>; next; } # multi liners if (/^\s+title:/) { ($hits [$i] { 'title' }) = /^\s+title: (.+)$/; $_ = <IN>; while (($_) && (/^\s{4}.+$/)) { if ($hits [$i] { 'title' }) { if ($hits [$i] { 'title' } =~ /\S=$/) { chop $hits [$i] { 'title' }; } else { $hits [$i] { 'title' } .= " "; } } $_ =~ s/^\s+//g; chop; $hits [$i] { 'title' } .= $_; $_ = <IN>; } $hits [$i] { 'title' } = remove_quirks ($hits [$i] { 'title' }); next; } if (/^\s+full-text:/) { ($hits [$i] { 'full-text' }) = /^\s+full-text: (.+)$/; $_ = <IN>; $j = 0; while (($_) && ($j < $::n_lines) && (/^\s{4}.+$/)) { if ($hits [$i] { 'full-text' }) { if ($hits [$i] { 'full-text' } =~ /\S=$/) { chop $hits [$i] { 'full-text' }; } else { $hits [$i] { 'full-text' } .= " "; } } $_ =~ s/^\s+//g; chop; $hits [$i] { 'full-text' } .= $_; $_ = <IN>; $j++; } $hits [$i] { 'full-text' } = remove_quirks ($hits [$i] { 'full-text' }); next; } if (/^\s+url:/) { ($hits [$i] { 'url' }) = /^\s+url: (.+)$/; $_ = <IN>; while (($_) && (/^\s{4}.+$/)) { if (($hits [$i] { 'url' }) && ($hits [$i] { 'url' } =~ /\S+=$/)) { chop $hits [$i] { 'url' }; } $_ =~ s/^\s+//g; chop; $hits [$i] { 'url' } .= $_; $_ = <IN>; } next; } $_ = <IN>; } close (IN); alarm (0); # print all parsed lines for debugging # keys used to print search results #@::keys = ("rank", "url", "last-modification-time", # "file-size", "title", "full-text"); #foreach $i (0 .. $#hits) { # foreach my $key (@::keys) { # print $i, " ->> ", $key, " ->> ", $hits [$i] { $key }, "\n"; # } # print "---------------------\n"; #} ####################################################################### # Postprocessing # # we have to filter this after a full build of the hash because # # we don't know when both, title and sample fields are initialized. # ####################################################################### # title is sometimes repeated at the beginning of sampletext and some # # documents doesn't have any title at all. # # to do search and replace, meta characters must be escaped # ####################################################################### foreach $i (0 .. $#hits) { if ($hits [$i] { 'title' }) { $tmp = quotemeta ($hits [$i] { 'title' }); $hits [$i] { 'full-text' } =~ s/^$tmp //g; } else { $hits [$i] { 'title' } = $hits [$i] { 'url' }; } if (length ($hits [$i] { 'title' }) > 60) { $hits [$i] { 'title' } = substr ($hits [$i] { 'title' }, 0, 60) . "..."; } } return $num_hit, \@hits;}################################################### read search results from zoomsh ## args: none ## returns: ## number of total hits ## hash with search result ###################################################sub read_hash_zoomsh_xml { my ($num_hit, @hits, $i, $one_line, $elem, $curr_elem, $tmp, @wanted_str, $skip); # exit after this number of seconds alarm (60); $SIG{ALRM} = \&print_html_timeout; $::zoompid = open (IN, "$::zoomq|"); $_ = <IN>; # parse number of hits ($num_hit) = /^.*: (\d+) hits$/; $i = -1; $one_line = 0; $curr_elem = ""; @wanted_str = ("url", "title", "full-text", "file-size", "last-modification-time"); <IN>; while (<IN>) { ($elem) = /<(.*?)>/; if ($elem) { if ($elem eq "xsoif") { $i++; next; } if ($elem eq "\/idzebra") { $one_line = 0; next; } if ($elem =~ /^idzebra/) { $one_line = 1; next; } if (!$one_line) { $curr_elem = ($elem =~ /^\//) ? "" : $elem; } else { ($hits [$i] { 'filename' }) = /<filename>(\S+)<\/filename>/ if ($elem eq "filename"); ($hits [$i] { 'score' }) = /<score>(\d+)<\/score>/ if ($elem eq "score"); } } else { foreach $tmp (@wanted_str) { if ($curr_elem eq $tmp) { chop; if (!$hits [$i] { "$tmp" }) { $skip = 0; $hits [$i] { "$tmp" } = $_; } else { if (!$skip) { $skip = 1 if (length ($hits [$i] { "$tmp" }) > ($::n_lines + 1) * 80); $hits [$i] { "$tmp" } .= " $_"; } } last; } } } } close (IN); alarm (0); # print all parsed lines for debugging # keys used to print search results #@::keys = ("rank", "url", "last-modification-time", # "file-size", "title", "full-text"); #foreach $i (0 .. $#hits) { # foreach my $key (@::keys) { # print $i, " ->> ", $key, " ->> ", $hits [$i] { $key }, "\n"; # } # print "---------------------\n"; #} ####################################################################### # Postprocessing # # we have to filter this after a full build of the hash because # # we don't know when both, title and sample fields are initialized. # ####################################################################### # title is sometimes repeated at the beginning of sampletext and some # # documents doesn't have any title at all. # # to do search and replace, meta characters must be escaped # ####################################################################### foreach $i (0 .. $#hits) { if ($hits [$i] { 'title' }) { $tmp = $hits [$i] { 'title' }; $hits [$i] { 'full-text' } =~ s/^$tmp\s*//g; } else { $hits [$i] { 'title' } = $hits [$i] { 'url' }; } $hits [$i] { 'title' } = trim_line ($hits [$i] { 'title' }, 80); $hits [$i] { 'full-text' } = trim_line ($hits [$i] { 'full-text' }, $::n_lines * 80); } return $num_hit, \@hits;}################################################## build query string from input ## args: ## query string ## boolean operation ## array of search terms ## returns: query string in Zebra's client ## notation ##################################################sub build_query { my $query = shift; my $op = shift; my @token = @_; my ($attr, $i, $z_query); return "" if ($#token < 0); # request ranking $attr = "\@attr 2=102 "; #$attr .= "\@attr 1=/xsoif "; #$attr .= "\@attr 1=/xsoif/url "; #$attr .= "\@attr 1=/xsoif/author "; #$attr .= "\@attr 1=/xsoif/title "; #$attr .= "\@attr 1=/xsoif/abstract "; #$attr .= "\@attr 1=/xsoif/headings "; #$attr .= "\@attr 1=/xsoif/keywords "; #$attr .= "\@attr 1=/xsoif/description "; #$attr .= "\@attr 1=/xsoif/full-text "; # make it a regexp search, if any of these special characters are found # in query string: ".[]*+?|" $attr .= "\@attr 5=102 " if ($query =~ /\.|\[|\]|\*|\+|\?|\|/); foreach (@token) { $_ = "\\\"$_\\\"" if (/\s/); if ($z_query) { if (/^\-/) { s/^\-//g; $z_query = "\@not $z_query " . $attr . $_; } else { $z_query = "\@$op $z_query " . $attr . $_; } } else { $z_query = $attr . $_; } } return $z_query;}############################################## strip html tags still left in summarized ## data and map dangerous letters ## args: text to strip ## returns: stripped text ##############################################sub remove_quirks { my $line = shift; $line =~ s/</\</g; $line =~ s/>/\>/g; return $line;}################################################## truncate a line reasonably ## args: string, length ## returns: a trimmed string shorter than length ##################################################sub trim_line { my $str = shift; my $len = shift; return $str if (length ($str) <= $len); $str = substr ($str, 0, $len); $str =~ s/(.*)\s.*/$1/g; $str .= " ..." if (!($str =~ /\.$/)); return $str;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -