📄 session.pm

📁 harvest是一个下载html网页得机器人
💻 PM
📖 第 1 页 / 共 3 页
字号:
# -----------------------------------------------------------------------------# Init/compact# -----------------------------------------------------------------------------sub init {    my ($self) = @_;    $self->checkzh;    return(IDZebra::init($self->{zh}));}sub compact {    my ($self) = @_;    $self->checkzh;    return(IDZebra::compact($self->{zh}));}sub update {    my ($self, %args) = @_;    $self->checkzh;    my $rg = $self->_update_args(%args);    $self->_selectRecordGroup($rg);    $self->begin_trans;    IDZebra::repository_update($self->{zh});    $self->_selectRecordGroup($self->{rg});    $self->end_trans;}sub delete {    my ($self, %args) = @_;    $self->checkzh;    my $rg = $self->_update_args(%args);    $self->_selectRecordGroup($rg);    $self->begin_trans;    IDZebra::repository_delete($self->{zh});    $self->_selectRecordGroup($self->{rg});    $self->end_trans;}sub show {    my ($self, %args) = @_;    $self->checkzh;    my $rg = $self->_update_args(%args);    $self->_selectRecordGroup($rg);    $self->begin_trans;    IDZebra::repository_show($self->{zh});    $self->_selectRecordGroup($self->{rg});    $self->end_trans;}sub _update_args {    my ($self, %args) = @_;    my $rg = $self->_makeRecordGroup(%args);    $self->_selectRecordGroup($rg);    return ($rg);}# -----------------------------------------------------------------------------# Per record update# -----------------------------------------------------------------------------sub insert_record {    my ($self, %args) = @_;    $self->checkzh;    my @args = $self->_record_update_args(%args);    my $stat = IDZebra::insert_record($self->{zh}, @args);    my $sysno = $args[2]; $stat = -1 * $stat if ($stat > 0);    return $stat ? $stat : $$sysno;    if ($stat) { return ($stat); } else { return $sysno};}sub update_record {    my ($self, %args) = @_;    $self->checkzh;    my @args = $self->_record_update_args(%args);    my $stat = IDZebra::update_record($self->{zh}, @args);    my $sysno = $args[2]; $stat = -1 * $stat if ($stat > 0);    return $stat ? $stat : $$sysno;    if ($stat) { return ($stat); } else { return $$sysno};}sub delete_record {    my ($self, %args) = @_;    $self->checkzh;    my @args = $self->_record_update_args(%args);    my $stat = IDZebra::delete_record($self->{zh}, @args);    my $sysno = $args[2]; $stat = -1 * $stat if ($stat > 0);    return $stat ? $stat : $$sysno;}sub _record_update_args {    my ($self, %args) = @_;    my $sysno   = $args{sysno}      ? $args{sysno}      : 0;    my $match   = $args{match}      ? $args{match}      : "";    my $rectype = $args{recordType} ? $args{recordType} : "";    my $fname   = $args{file}       ? $args{file}       : "<no file>";    my $force   = $args{force}      ? $args{force}      : 0;    my $buff;    if ($args{data}) {	$buff = $args{data};    }     elsif ($args{file}) {	CORE::open (F, $args{file}) || warn ("Cannot open $args{file}");	$buff = join('',(<F>));	CORE::close (F);    }    my $len = length($buff);    delete ($args{sysno});    delete ($args{match});    delete ($args{recordType});    delete ($args{file});    delete ($args{data});    delete ($args{force});    my $rg = $self->_makeRecordGroup(%args);    # If no record type is given, then try to find it out from the    # file extension;    unless ($rectype) {	if (my ($ext) = $fname =~ /\.(\w+)$/) {	    my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);	    $rectype = $rg2->{recordType};	}     }    $rg->{databaseName} = "Default" unless ($rg->{databaseName});    unless ($rectype) {	$rectype="";    }    return ($rg, $rectype, \$sysno, $match, $fname, $buff, $len, $force);}# -----------------------------------------------------------------------------# CQL stuffsub cqlmap {    my ($self,$mapfile) = @_;    if ($#_ > 0) {	if ($self->{cql_mapfile} ne $mapfile) {	    unless (-f $mapfile) {		croak("Cannot find $mapfile");	    }	    if (defined ($self->{cql_ct})) {	      IDZebra::cql_transform_close($self->{cql_ct});	    }	    $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);	    $self->{cql_mapfile} = $mapfile;	}    }    return ($self->{cql_mapfile});}sub cql2pqf {    my ($self, $cqlquery) = @_;    unless (defined($self->{cql_ct})) {	croak("CQL map file is not specified yet.");    }    my $res = "\0" x 2048;    my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);    if ($r) {#	carp ("Error transforming CQL query: '$cqlquery', status:$r");    }    $res=~s/\0.+$//g;    return ($res,$r); }# -----------------------------------------------------------------------------# Search # -----------------------------------------------------------------------------sub search {    my ($self, %args) = @_;    $self->checkzh;    if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }    my $query;    if ($args{pqf}) {	$query = $args{pqf};    }    elsif ($args{cql}) {	my $cqlstat;	($query, $cqlstat) =  $self->cql2pqf($args{cql});	unless ($query) {	    croak ("Failed to transform query: '$args{cql}', ".		   "status: ($cqlstat)");	}    }    unless ($query) {	croak ("No query given to search");    }    my @origdbs;    if ($args{databases}) {	@origdbs = $self->databases;	$self->databases(@{$args{databases}});    }    my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;    my $rs = $self->_search_pqf($query, $rsname);    if ($args{databases}) {	$self->databases(@origdbs);    }    if ($args{sort}) {	if ($rs->errCode) {	    carp("Sort skipped due to search error: ".		 $rs->errCode);	} else {	    $rs->sort($args{sort});	}    }    return ($rs);}sub _new_setname {    my ($self) = @_;    return ("set_".$self->{rscount}++);}sub _search_pqf {    my ($self, $query, $setname) = @_;    my $hits = 0;    my $res = IDZebra::search_PQF($self->{zh},				   $query,				   $setname,				   \$hits);    my $rs  = IDZebra::Resultset->new($self,				      name        => $setname,				      query       => $query,				      recordCount => $hits,				      errCode     => $self->errCode,				      errString   => $self->errString);    return($rs);}# -----------------------------------------------------------------------------# Sort## Sorting of multiple result sets is not supported by zebra...# -----------------------------------------------------------------------------sub sortResultsets {    my ($self, $sortspec, $setname, @sets) = @_;    $self->checkzh;    if ($#sets > 0) {	croak ("Sorting/merging of multiple resultsets is not supported now");    }    my @setnames;    my $count = 0;    foreach my $rs (@sets) {	push (@setnames, $rs->{name});	$count += $rs->{recordCount};  # is this really sure ??? It doesn't 	                               # matter now...    }    my $status = IDZebra::sort($self->{zh},			       $self->{odr_output},			       $sortspec,			       $setname,			       \@setnames);    my $errCode = $self->errCode;    my $errString = $self->errString;    logf (LOG_LOG, "Sort status $setname: %d, errCode: %d, errString: %s", 	  $status, $errCode, $errString);    if ($status || $errCode) {$count = 0;}    my $rs  = IDZebra::Resultset->new($self,				      name        => $setname,				      recordCount => $count,				      errCode     => $errCode,				      errString   => $errString);        return ($rs);}# -----------------------------------------------------------------------------# Scan# -----------------------------------------------------------------------------sub scan {    my ($self, %args) = @_;    $self->checkzh;    unless ($args{expression}) {	croak ("No scan expression given");    }    my $sl = IDZebra::ScanList->new($self,%args);    return ($sl);}# ============================================================================__END__=head1 NAMEIDZebra::Session - A Zebra database server session for update and retrieval=head1 SYNOPSIS  $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');  $sess->open();  $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',				 groupName  => 'demo1');  $sess->group(groupName => 'demo2');  $sess->init();  $sess->begin_trans;  $sess->update(path      =>  'lib');  my $s1=$sess->update_record(data       => $rec1,	 		      recordType => 'grs.perl.pod',			      groupName  => "demo1",			      );  my $stat = $sess->end_trans;  $sess->databases('demo1','demo2');  my $rs1 = $sess->search(cqlmap    => 'demo/cql.map',			  cql       => 'dc.title=IDZebra',			  databases => [qw(demo1 demo2)]);  $sess->close;=head1 DESCRIPTIONZebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries. Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits. =head1 OPENING AND CLOSING A ZEBRA SESSIONSFor the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use  $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');  $sess->open();or  $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:  $sess->close();This will  - close all transactions  - destroy all result sets and scan lists   - close the sessionNote, that if I<shadow registers> are enabled, the changes will not be committed automatically.In the future different database access methods are going to be available, like:  $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:  $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',                                 groupName  => 'demo');=head1 RECORD GROUPS If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg). For each open session a default record group is assigned. You can configure it in the constructor, or by the B<group> method:  $sess->group(groupName => ..., ...)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -