📄 fetch.pm
字号:
package File::Fetch;use strict;use FileHandle;use File::Copy;use File::Spec;use File::Spec::Unix;use File::Basename qw[dirname];use Cwd qw[cwd];use Carp qw[carp];use IPC::Cmd qw[can_run run];use File::Path qw[mkpath];use Params::Check qw[check];use Module::Load::Conditional qw[can_load];use Locale::Maketext::Simple Style => 'gettext';use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT $BLACKLIST $METHOD_FAIL $VERSION $METHODS $FTP_PASSIVE $TIMEOUT $DEBUG $WARN ];use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] }; $VERSION = '0.14';$VERSION = eval $VERSION; # avoid warnings with development releases$PREFER_BIN = 0; # XXX TODO implement$FROM_EMAIL = 'File-Fetch@example.com';$USER_AGENT = 'File::Fetch/$VERSION';$BLACKLIST = [qw|ftp|];$METHOD_FAIL = { };$FTP_PASSIVE = 1;$TIMEOUT = 0;$DEBUG = 0;$WARN = 1;### methods available to fetch the file depending on the scheme$METHODS = { http => [ qw|lwp wget curl lynx| ], ftp => [ qw|lwp netftp wget curl ncftp ftp| ], file => [ qw|lwp file| ], rsync => [ qw|rsync| ]};### silly warnings ###local $Params::Check::VERBOSE = 1;local $Params::Check::VERBOSE = 1;local $Module::Load::Conditional::VERBOSE = 0;local $Module::Load::Conditional::VERBOSE = 0;### see what OS we are on, important for file:// uris ###use constant ON_WIN => ($^O eq 'MSWin32');use constant ON_VMS => ($^O eq 'VMS'); use constant ON_UNIX => (!ON_WIN);use constant HAS_VOL => (ON_WIN);use constant HAS_SHARE => (ON_WIN);=pod=head1 NAMEFile::Fetch - A generic file fetching mechanism=head1 SYNOPSIS use File::Fetch; ### build a File::Fetch object ### my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); ### fetch the uri to cwd() ### my $where = $ff->fetch() or die $ff->error; ### fetch the uri to /tmp ### my $where = $ff->fetch( to => '/tmp' ); ### parsed bits from the uri ### $ff->uri; $ff->scheme; $ff->host; $ff->path; $ff->file;=head1 DESCRIPTIONFile::Fetch is a generic file fetching mechanism.It allows you to fetch any file pointed to by a C<ftp>, C<http>,C<file>, or C<rsync> uri by a number of different means.See the C<HOW IT WORKS> section further down for details.=head1 ACCESSORSA C<File::Fetch> object has the following accessors=over 4=item $ff->uriThe uri you passed to the constructor=item $ff->schemeThe scheme from the uri (like 'file', 'http', etc)=item $ff->hostThe hostname in the uri. Will be empty if host was originally 'localhost' for a 'file://' url.=item $ff->volOn operating systems with the concept of a volume the second elementof a file:// is considered to the be volume specification for the file.Thus on Win32 this routine returns the volume, on other operatingsystems this returns nothing.On Windows this value may be empty if the uri is to a network share, in which case the 'share' property will be defined. Additionally, volume specifications that use '|' as ':' will be converted on read to use ':'.On VMS, which has a volume concept, this field will be empty because VMSfile specifications are converted to absolute UNIX format and the volumeinformation is transparently included.=item $ff->shareOn systems with the concept of a network share (currently only Windows) returns the sharename from a file://// url. On other operating systems returns empty.=item $ff->pathThe path from the uri, will be at least a single '/'.=item $ff->fileThe name of the remote file. For the local file name, theresult of $ff->output_file will be used. =cut############################# Object & Accessors #############################{ ### template for new() and autogenerated accessors ### my $Tmpl = { scheme => { default => 'http' }, host => { default => 'localhost' }, path => { default => '/' }, file => { required => 1 }, uri => { required => 1 }, vol => { default => '' }, # windows for file:// uris share => { default => '' }, # windows for file:// uris _error_msg => { no_override => 1 }, _error_msg_long => { no_override => 1 }, }; for my $method ( keys %$Tmpl ) { no strict 'refs'; *$method = sub { my $self = shift; $self->{$method} = $_[0] if @_; return $self->{$method}; } } sub _create { my $class = shift; my %hash = @_; my $args = check( $Tmpl, \%hash ) or return; bless $args, $class; if( lc($args->scheme) ne 'file' and not $args->host ) { return File::Fetch->_error(loc( "Hostname required when fetching from '%1'",$args->scheme)); } for (qw[path file]) { unless( $args->$_() ) { # 5.5.x needs the () return File::Fetch->_error(loc("No '%1' specified",$_)); } } return $args; } }=item $ff->output_fileThe name of the output file. This is the same as $ff->file,but any query parameters are stripped off. For example: http://example.com/index.html?x=ywould make the output file be C<index.html> rather than C<index.html?x=y>.=back=cutsub output_file { my $self = shift; my $file = $self->file; $file =~ s/\?.*$//g; return $file;}### XXX do this or just point to URI::Escape?# =head2 $esc_uri = $ff->escaped_uri# # =cut# # ### most of this is stolen straight from URI::escape# { ### Build a char->hex map# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;# # sub escaped_uri {# my $self = shift;# my $uri = $self->uri;# # ### Default unsafe characters. RFC 2732 ^(uric - reserved)# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/# $escapes{$1} || $self->_fail_hi($1)/ge;# # return $uri;# }# # sub _fail_hi {# my $self = shift;# my $char = shift;# # $self->_error(loc(# "Can't escape '%1', try using the '%2' module instead", # sprintf("\\x{%04X}", ord($char)), 'URI::Escape'# )); # }# # sub output_file {# # }# # # }=head1 METHODS=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );Parses the uri and creates a corresponding File::Fetch::Item object,that is ready to be C<fetch>ed and returns it.Returns false on failure.=cutsub new { my $class = shift; my %hash = @_; my ($uri); my $tmpl = { uri => { required => 1, store => \$uri }, }; check( $tmpl, \%hash ) or return; ### parse the uri to usable parts ### my $href = __PACKAGE__->_parse_uri( $uri ) or return; ### make it into a FFI object ### my $ff = File::Fetch->_create( %$href ) or return; ### return the object ### return $ff;}### parses an uri to a hash structure:###### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )###### becomes:###### $href = {### scheme => 'ftp',### host => 'ftp.cpan.org',### path => '/pub/mirror',### file => 'index.html'### };###### In the case of file:// urls there maybe be additional fields###### For systems with volume specifications such as Win32 there will be ### a volume specifier provided in the 'vol' field.###### 'vol' => 'volumename'###### For windows file shares there may be a 'share' key specified###### 'share' => 'sharename' ###### Note that the rules of what a file:// url means vary by the operating system ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and ### not '/foo/bar.txt'###### Similarly if the host interpreting the url is VMS then ### file:///disk$user/my/notes/note12345.txt' means ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as### if it is unix where it means /disk$user/my/notes/note12345.txt'.### Except for some cases in the File::Spec methods, Perl on VMS will generally### handle UNIX format file specifications.###### This means it is impossible to serve certain file:// urls on certain systems.###### Thus are the problems with a protocol-less specification. :-(###sub _parse_uri { my $self = shift; my $uri = shift or return; my $href = { uri => $uri }; ### find the scheme ### $uri =~ s|^(\w+)://||; $href->{scheme} = $1; ### See rfc 1738 section 3.10 ### http://www.faqs.org/rfcs/rfc1738.html ### And wikipedia for more on windows file:// urls ### http://en.wikipedia.org/wiki/File:// if( $href->{scheme} eq 'file' ) { my @parts = split '/',$uri; ### file://hostname/... ### file://hostname/... ### normalize file://localhost with file:/// $href->{host} = $parts[0] || ''; ### index in @parts where the path components begin; my $index = 1; ### file:////hostname/sharename/blah.txt if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { $href->{host} = $parts[2] || ''; # avoid warnings $href->{share} = $parts[3] || ''; # avoid warnings $index = 4 # index after the share ### file:///D|/blah.txt ### file:///D:/blah.txt } elsif (HAS_VOL) { ### this code comes from dmq's patch, but: ### XXX if volume is empty, wouldn't that be an error? --kane ### if so, our file://localhost test needs to be fixed as wel $href->{vol} = $parts[1] || ''; ### correct D| style colume descriptors $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; $index = 2; # index after the volume } ### rebuild the path from the leftover parts; $href->{path} = join '/', '', splice( @parts, $index, $#parts ); } else { ### using anything but qw() in hash slices may produce warnings ### in older perls :-( @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s; } ### split the path into file + dir ### { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); $href->{path} = $parts[1]; $href->{file} = $parts[2]; } ### host will be empty if the target was 'localhost' and the ### scheme was 'file' $href->{host} = '' if ($href->{host} eq 'localhost') and ($href->{scheme} eq 'file'); return $href;}=head2 $ff->fetch( [to => /my/output/dir/] )Fetches the file you requested. By default it writes to C<cwd()>,but you can override that by specifying the C<to> argument.Returns the full path to the downloaded file on success, and falseon failure.=cut
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -