📄 npg.pm

📁 一个论文管理系统
💻 PM
字号:
# $Id: NPG.pm,v 1.11 2006/01/10 22:48:43 martin Exp $## Copyright 2005 Nature Publishing Group# This program is free software; you can redistribute it and/or# modify it under the terms of the GNU General Public License# as published by the Free Software Foundation; either version 2# of the License, or (at your option) any later version.## The Bibliotech::CitationSource::NPG class retrieves citation data for articles# on Nature.com.use strict;use Bibliotech::CitationSource;package Bibliotech::CitationSource::NPG;use base 'Bibliotech::CitationSource';use URI;use URI::QueryParam;sub api_version {  1;}sub name {  'Nature Publishing Group';}sub cfgname {  'NPG';}sub version {  '$Revision: 1.11 $';}sub understands {  my ($self, $uri) = @_;  return 0 unless $uri->scheme eq 'http';  #check the host  return 0 unless ($uri->host =~ /^(www\.)?nature.com$/);  #old-style links  return 1 if ($uri->path eq '/cgi-taf/DynaPage.taf' && $uri->query_param('file'));  #new-style links  return 1 if ($uri->path =~ m!^/[a-z]+?/journal/v(?:\d+|aop)/n(?:\d+|current)/(?:full|abs)/.+\.html!i);  return 0;}sub filter {  my ($self, $uri) = @_;  $uri->query_param_delete('_UserReference');  # always drop  $uri->query_param_delete('filetype') unless $uri->query_param('filetype');  # drop if empty  return $uri;}sub citations {  my ($self, $article_uri) = @_;  my $ris;  eval {    die "do not understand URI\n" unless $self->understands($article_uri);    my $file;    #old-style link    if(my $temp = $article_uri->query_param('file')) {      $file = $temp;    }    #new-style link    else {	$file = $article_uri->path;        #strip fragments or queries        $file =~ s/\.html(?:#|\?).*/.html/;    }    die "no file name seen in URI\n" unless $file;    my ($abr, $vol, $iss, $uid) = ($file =~ m!^/([a-z]+)/journal/v(\d+|(?:aop))/n(\d+|(?:current))/.+?/(.+?)(?:_[a-z]+)?\.html!i);    die "no abbreviated journal name\n" unless $abr;    die "no volume\n" unless $vol;    die "no issue\n" unless $iss;    die "no UID\n" unless $uid;    my $query_uri = new URI ("http://www.nature.com/$abr/journal/v$vol/n$iss/ris/$uid.ris");    my $ris_raw = $self->get($query_uri);    $ris = new Bibliotech::CitationSource::NPG::RIS ($ris_raw);    if (!$ris->has_data) {      # give it one more try because nature.com is flakey      # the NPG servers occasionally report 404 or 501 for no reason      # additionally I think they sometimes return no data with a 200      sleep 2;      $ris_raw = $self->get($query_uri);      $ris = new Bibliotech::CitationSource::NPG::RIS ($ris_raw);    }    die "RIS obj false\n" unless $ris;    die "RIS file contained no data\n" unless $ris->has_data;  };      die $@ if $@ =~ /at .* line \d+/;  $self->errstr($@), return undef if $@;  return bless [bless $ris, 'Bibliotech::CitationSource::NPG::Result'], 'Bibliotech::CitationSource::ResultList';}package Bibliotech::CitationSource::NPG::RIS;use base 'Class::Accessor::Fast';# used for spec: http://www.refman.com/support/risformat_intro.asp# read a RIS file and provide back an object that is a hashref of the tags,# using arrayrefs for tags with multiple valuesour %TYPES = (ABST  => 'Abstract',	      ADVS  => 'Audiovisual material',	      ART   => 'Art Work',	      BILL  => 'Bill/Resolution',	      BOOK  => 'Book, Whole',	      CASE  => 'Case',	      CHAP  => 'Book chapter',	      COMP  => 'Computer program',	      CONF  => 'Conference proceeding',	      CTLG  => 'Catalog',	      DATA  => 'Data file',	      ELEC  => 'Electronic Citation',	      GEN   => 'Generic',	      HEAR  => 'Hearing',	      ICOMM => 'Internet Communication',	      INPR  => 'In Press',	      JFULL => 'Journal (full)',	      JOUR  => 'Journal',	      MAP   => 'Map',	      MGZN  => 'Magazine article',	      MPCT  => 'Motion picture',	      MUSIC => 'Music score',	      NEWS  => 'Newspaper',	      PAMP  => 'Pamphlet',	      PAT   => 'Patent',	      PCOMM => 'Personal communication',	      RPRT  => 'Report',	      SER   => 'Serial (Book, Monograph)',	      SLIDE => 'Slide',	      SOUND => 'Sound recording',	      STAT  => 'Statute',	      THES  => 'Thesis/Dissertation',	      UNBIL => 'Unenacted bill/resolution',	      UNPB  => 'Unpublished work',	      VIDEO => 'Video recording'	      );__PACKAGE__->mk_accessors(qw/TY ID T1 TI CT BT T2 BT T3 A1 AU A2 ED A3 Y1 PY Y2 N1 AB N2 KW RP JF JO JA J1 J2			  VL IS SP EP CP CY PB SN AD AV M1 M2 M3 U1 U2 U3 U4 U5 UR L1 L2 L3 L4 ER			  has_data inceq/);sub new {  my ($class, $data) = @_;  my $self = {};  bless $self, ref $class || $class;  $self->has_data(0);  $self->inceq(0);  # "include equivalents" - when calling title() do we return just T1 or all of T1, TI, CT, BT  $self->parse($data) if $data;  return $self;}sub parse {  my ($self, $data) = @_;  my %values;  {    my @lines;    {      my @data = ref $data ? map { s/\r?\n$//; $_; } @{$data} : split(/\r?\n/, $data);      my $in_data = 0;      my $double_newlines = 0;      foreach (@data) {	if ($double_newlines == 1) {	  $double_newlines = 2;	}	elsif ($double_newlines == 2) {	  if (/^$/) {	    $double_newlines = 1;	    next;	  }	  else {	    $double_newlines = 0;	  }	}	if ($in_data) {	  if (/^ER  - ?/) {	    $in_data = 0;	  }	  else {	    if (/^\w\w  - ?/) {	      push @lines, $_;	    }	    else {	      if (@lines) {		if ($lines[-1] =~ /^TY  - ?/) {		  $double_newlines = 1;		}		else {		  $lines[-1] .= "\n$_";		}	      }	    }	  }	}	elsif (/^TY  - ?/) {	  $in_data = 1;	  $self->has_data(1);	  push @lines, $_;	}      }    }    foreach (@lines) {      my ($key, $value) = /^(\w\w)  - (.*)$/s;      next unless defined $key && $self->can($key);      my $stored = $values{$key};      if (defined $stored) {	if (ref $stored) {	  push @{$stored}, $value;	}	else {	  $values{$key} = [$stored, $value];	}      }      else {	$values{$key} = $value;      }    }  }  foreach my $key (keys %values) {    $self->$key($values{$key});  }  return $self;}sub collect {  my ($self, @fields) = @_;  my $include = $self->inceq;  my $soft = 0;  if ($fields[0] eq 'soft') {    shift @fields;    $soft = 1;  }  if (($soft and $include >= 2) or (!$soft and $include >= 1)) {    my @results;    foreach my $field (@fields) {      my $stored = $self->$field;      next unless defined $stored;      push @results, ref $stored ? @{$stored} : $stored;    }    return wantarray ? () : undef unless @results;    return wantarray ? @results : \@results;  }  else {    foreach my $field (@fields) {      my $stored = $self->$field;      return $stored if defined $stored;    }    return wantarray ? () : undef;  }}sub ris_type         { shift->collect(qw/TY/); }sub identification   { shift->collect(qw/ID/); }sub title_primary    { shift->collect(qw/T1 TI CT BT/); }sub title_secondary  { shift->collect(qw/T2 BT/); }sub title_series     { shift->collect(qw/T3/); }sub title      	     { shift->collect(soft => qw/title_primary title_secondary title_series/); }sub author_primary   { shift->collect(qw/A1 AU/); }sub author_secondary { shift->collect(qw/A2 ED/); }sub author_series    { shift->collect(qw/A3/); }sub author           { shift->collect(soft => qw/author_primary author_secondary author_series/); }sub authors          { shift->collect(qw/author/); }sub date_primary     { shift->collect(qw/Y1 PY/); }sub date_secondary   { shift->collect(qw/Y2/); }sub date             { shift->collect(soft => qw/date_primary date_secondary/); }sub notes            { shift->collect(qw/N1 AB/); }sub abstract         { shift->collect(qw/N2/); }sub keywords         { shift->collect(qw/KW/); }sub reprint          { shift->collect(qw/RP/); }sub periodical_name  { shift->collect(qw/JF JO/); }sub periodical_abbr  { shift->collect(qw/JA J1 J2/); }sub journal          { shift->collect(soft => qw/periodical_name periodical_abbr/); }sub journal_abbr     { shift->collect(qw/periodical_abbr/); }sub volume           { shift->collect(qw/VL/); }sub issue            { shift->collect(qw/IS/); }sub starting_page    { shift->collect(qw/SP/); }sub ending_page      { shift->collect(qw/EP/); }sub page             { shift->collect(qw/starting_page/); }sub publication_city { shift->collect(qw/CP CY/); }sub publisher        { shift->collect(qw/PB/); }sub issn_or_isbn     { shift->collect(qw/SN/); }sub issn             { shift->collect(qw/issn_or_isbn/); }sub isbn             { shift->collect(qw/issn_or_isbn/); }sub address          { shift->collect(qw/AD/); }sub availablity      { shift->collect(qw/AV/); }sub misc1            { shift->collect(qw/M1/); }sub misc2            { shift->collect(qw/M2/); }sub misc3            { shift->collect(qw/M3/); }sub misc             { shift->collect(qw/misc1 misc2 misc3/); }sub user1            { shift->collect(qw/U1/); }sub user2            { shift->collect(qw/U2/); }sub user3            { shift->collect(qw/U3/); }sub user4            { shift->collect(qw/U4/); }sub user5            { shift->collect(qw/U5/); }sub user             { shift->collect(qw/user1 user2 user3 user4 user5/); }sub url              { shift->collect(qw/UR/); }sub uri              { shift->collect(qw/url/); }sub web              { shift->collect(qw/url/); }sub pdf              { shift->collect(qw/L1/); }sub full_text        { shift->collect(qw/L2/); }sub related          { shift->collect(qw/L3/); }sub image            { shift->collect(qw/L4/); }sub links            { shift->collect(qw/web pdf full_text related image/); }sub page_range {  my $self = shift;   my $starting_page = $self->collect(qw/starting_page/) or return undef;  my $ending_page   = $self->collect(qw/ending_page/)   or return $starting_page;  return $starting_page.' - '.$ending_page if $starting_page != $ending_page;  return $starting_page;}sub ris_type_description {  return $TYPES{shift->ris_type};}sub is_valid_ris_type {  return exists $TYPES{shift->ris_type};}package Bibliotech::CitationSource::NPG::RIS::Result;use base ('Bibliotech::CitationSource::NPG::RIS', 'Bibliotech::CitationSource::Result', 'Class::Accessor::Fast');use List::Util qw/first/;use List::MoreUtils qw/none/;__PACKAGE__->mk_accessors(qw/type source/);sub identifiers {  {doi => shift->doi};}sub justone {  my ($self, $field, %options) = @_;  my $super = 'SUPER::'.$field;  my $stored = $self->$super or return undef;  return undef unless defined $stored;  return $stored unless ref $stored eq 'ARRAY';  my @stored = @{$stored};  my $join = $options{join};  return join($join, @stored) if defined $join;  my @not = @{$options{not}||[]};  my $first = first { my $value = $_; none { $value eq $_ } @not } @stored;  return $first;}sub authors {  my ($self) = @_;  my $authors = $self->SUPER::authors;  my @authors = map(Bibliotech::CitationSource::NPG::Result::Author->new($_), ref $authors ? @{$authors} : $authors);  bless \@authors, 'Bibliotech::CitationSource::Result::AuthorList';}# override - from Nature the abbreviated name arrives in JOsub periodical_name  { shift->collect(qw/JF/); }sub periodical_abbr  { shift->collect(qw/JO JA J1 J2/); }sub journal {  my ($self) = @_;  return Bibliotech::CitationSource::NPG::Result::Journal->new($self->justone('journal'),							       $self->justone('journal_abbr'),							       $self->justone('issn'));}sub ris_type 	{ shift->justone('ris_type'); }sub pubmed   	{ undef; }sub doi      	{ shift->justone('misc3'); }sub asin        { undef; }sub title    	{ shift->justone('title'); }sub description { shift->justone('notes'); }sub volume   	{ shift->justone('volume'); }sub issue    	{ shift->justone('issue'); }sub page     	{ shift->page_range; }sub url {  my @url = map { split(/[ \n]+/) } shift->collect(qw/UR L3/);  return unless @url;  return $url[0] if @url == 1;  return wantarray ? @url : \@url;}sub date {  my $date = shift->justone('date');  $date =~ s|^(\d+/\d*/\d*)/.*$|$1|;  return $date;}sub last_modified_date {  shift->date(@_);}package Bibliotech::CitationSource::NPG::Result;use base 'Bibliotech::CitationSource::NPG::RIS::Result';sub type {  'NPG';}sub source {  'NPG RIS file from www.nature.com';}package Bibliotech::CitationSource::NPG::Result::Author;use base 'Class::Accessor::Fast';__PACKAGE__->mk_accessors(qw/firstname initials lastname/);sub new {  my ($class, $author) = @_;  my ($lastname, $firstname);  if ($author =~ /^(.+?),\s*(.*)$/) {    ($lastname, $firstname) = ($1, $2);  }  elsif ($author =~ /^(.*)\s+(.+)$/) {    ($firstname, $lastname) = ($1, $2);  }  else {    $lastname = $author;  }  my $initials = join(' ', map { s/^(.).*$/$1/; $_; } split(/\s+/, $firstname)) || undef;  $firstname =~ s/(\s\w\.?)+$//;  return $class->SUPER::new({firstname => $firstname, lastname => $lastname, initials => $initials});}package Bibliotech::CitationSource::NPG::Result::Journal;use base 'Class::Accessor::Fast';__PACKAGE__->mk_accessors(qw/name medline_ta issn/);sub new {  my ($class, $name, $medline_ta, $issn) = @_;  return $class->SUPER::new({name => $name, medline_ta => $medline_ta, issn => $issn});}1;__END__
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -