📄 url.pm
字号:
package URI::URL;$VERSION = "4.15"; # $Date: 1999/07/21 19:12:38 $sub Version { $VERSION; }require 5.004;require Exporter;@ISA = qw(Exporter);@EXPORT = qw(url);require AutoLoader;*AUTOLOAD = \&AutoLoader::AUTOLOAD;use Carp ();# Basic lexical elements, taken from RFC 1738:## safe = "$" | "-" | "_" | "." | "+"# extra = "!" | "*" | "'" | "(" | ")" | ","# national = "{" | "}" | "|" | "\" | "^" | "~" | "[" | "]" | "`"# punctuation = "<" | ">" | "#" | "%" | <"># reserved = ";" | "/" | "?" | ":" | "@" | "&" | "="# escape = "%" hex hex# unreserved = alpha | digit | safe | extra# uchar = unreserved | escape# xchar = unreserved | reserved | escape# draft-fielding-url-syntax-05.txt adds '+' to the reserved chars and# takes '~' outuse strict;use vars qw($reserved $reserved_no_slash $reserved_no_form $unsafe $Debug $Strict_URL );$reserved = ";\\/?:\\@&=+#%"; # RFC 1738 reserved pluss '#' and '%'$reserved_no_slash = ";?:\\@&=+#%"; # used when escaping path$reserved_no_form = ";\\/?:\\@#%"; # used when escaping params and query# This is the unsafe characters (excluding those reserved)$unsafe = "\x00-\x20{}|\\\\^\\[\\]`<>\"\x7F-\xFF";#$unsafe .= "~"; # according to RFC1738 but not to common practice$Debug = 0; # set to 1 to print URLs on creation$Strict_URL = 0; # see new()use overload ( '""' => 'as_string', 'fallback' => 1 );my %Implementor = (); # mapping from scheme to implementation class# Easy to use constructorsub url ($;$){ URI::URL->new(@_);}# URI::URL objects are implemented as blessed hashes:## Each of the URL components (scheme, netloc, user, password, host,# port, path, params, query, fragment) are stored under their# name. The netloc, path, params and query is stored in quoted# (escaped) form. The others is stored unquoted (unescaped).## Netloc is special since it is rendundant (same as# "user:password@host:port") and must be kept in sync with those.## The '_str' key stores a cached stringified version of the URL# (by definition in quoted form).# The '_base' key stores the optional base of a relative URL.## The '_orig_url' is used while debugging is on.## Subclasses may add their own keys but must take great care to# avoid names which might be used in later verions of this module.sub new{ my($class, $init, $base) = @_; my $self; if (ref $init) { $self = $init->clone; $self->base($base) if $base; } else { $init = "" unless defined $init; # RFC 1738 appendix suggest that we just ignore extra whitespace $init =~ s/\s+//g; # Also get rid of any <URL: > wrapper $init =~ s/^<(?:URL:)?(.*)>$/$1/; # We need a scheme to determine which class to use my($scheme) = $init =~ m/^([.+\-\w]+):/; if (!$scheme and $base){ # get scheme from base if (ref $base){ # may be object or just a string $scheme = $base->scheme; } else { $scheme = $1 if $base =~ m/^([.+\-\w]+):/; } } unless($scheme){ Carp::croak("Unable to determine scheme for '$init'") if $Strict_URL; $scheme = 'http'; } my $impclass = URI::URL::implementor($scheme); unless ($impclass) { Carp::croak("URI::URL scheme '$scheme' is not supported") if $Strict_URL; # use generic as fallback require URI::URL::_generic; URI::URL::implementor($scheme, 'URI::URL::_generic'); $impclass = 'URI::URL::_generic'; } # hand-off to scheme specific implementation sub-class $self->{'_orig_url'} = $init if $Debug; $self = $impclass->new($init, $base); } $self->print_on('STDERR') if $Debug; return $self;}sub clone{ my $self = shift; # this work as long as none of the components are references themselves bless { %$self }, ref $self;}sub implementor{ my($scheme, $impclass) = @_; unless (defined $scheme) { require URI::URL::_generic; return 'URI::URL::_generic'; } $scheme = lc($scheme); if ($impclass) { $impclass->_init_implementor($scheme); my $old = $Implementor{$scheme}; $Implementor{$scheme} = $impclass; return $old; } my $ic = $Implementor{$scheme}; return $ic if $ic; # scheme not yet known, look for internal or # preloaded (with 'use') implementation $ic = "URI::URL::$scheme"; # default location no strict 'refs'; # check we actually have one for the scheme: unless (defined @{"${ic}::ISA"}) { # Try to load it eval { require "URI/URL/$scheme.pm"; }; die $@ if $@ && $@ !~ /Can\'t locate/; $ic = '' unless defined @{"${ic}::ISA"}; } if ($ic) { $ic->_init_implementor($scheme); $Implementor{$scheme} = $ic; } $ic;}sub _init_implementor{ my($class, $scheme) = @_; # Remember that one implementor class may actually # serve to implement several URL schemes. if ($] < 5.003_17) { no strict qw(refs); # Setup overloading inheritace - experimental %{"${class}::OVERLOAD"} = %URI::URL::OVERLOAD unless defined %{"${class}::OVERLOAD"}; }}# This private method help us implement access to the elements in the# URI::URL object hash (%$self). You can set up access to an element# with a routine similar to this one:## sub component { shift->_elem('component', @_); }sub _elem{ my $self = shift; my $elem = shift; my $old = $self->{$elem}; if (@_) { $self->{$elem} = shift; $self->{'_str'} = ''; # void cached string } $old;}# Make all standard methods available to all kinds of URLs. This allow# us to call these without to much worry when URI::URL::strict(0);sub bad_method;*netloc = \&bad_method;*user = \&bad_method;*password = \&bad_method;*host = \&bad_method;*port = \&bad_method;*default_port = \&bad_method;*full_path = \&bad_method;*epath = \&bad_method;*eparams = \&bad_method;*equery = \&bad_method;*path = \&bad_method;*path_components = \&bad_method;*params = \&bad_method;*query = \&bad_method;*frag = \&bad_method;## A U T O L O A D I N G## The rest of the methods are autoloaded because they should be less# frequently used. We define stubs here so that inheritance works as# it should.sub newlocal;sub strict;sub base;sub scheme;sub crack;sub abs;sub rel;sub as_string;sub eq;sub print_on;# Don't need DESTROY but avoid trying to AUTOLOAD it.sub DESTROY { }1;__END__sub newlocal{ require URI::URL::file; my $class = shift; URI::URL::file->newlocal(@_); # pass it on the the file class}sub strict{ return $Strict_URL unless @_; my $old = $Strict_URL; $Strict_URL = $_[0]; $old;}# Access some attributes of a URL object:sub base { my $self = shift; my $base = $self->{'_base'}; if (@_) { # set my $new_base = shift; $new_base = $new_base->abs if ref($new_base); # unsure absoluteness $self->{_base} = $new_base; } return unless defined wantarray; # The base attribute supports 'lazy' conversion from URL strings # to URL objects. Strings may be stored but when a string is # fetched it will automatically be converted to a URL object. # The main benefit is to make it much cheaper to say: # new URI::URL $random_url_string, 'http:' if (defined($base) && !ref($base)) { $base = new URI::URL $base; $self->_elem('_base', $base); # set new object } $base;}sub scheme { my $self = shift; my $old = $self->{'scheme'}; if (@_) { my $new_scheme = shift; if (defined($new_scheme) && length($new_scheme)) { # reparse URL with new scheme my $str = $self->as_string; $str =~ s/^[\w+\-.]+://; my $newself = new URI::URL "$new_scheme:$str"; %$self = %$newself; bless $self, ref($newself); } else { $self->{'scheme'} = undef; } } $old;}sub crack{ # should be overridden by subclasses my $self = shift; ($self->scheme, # 0: scheme undef, # 1: user undef, # 2: passwd undef, # 3: host undef, # 4: port undef, # 5: path undef, # 6: params undef, # 7: query undef # 8: fragment )}# These are overridden by _generic (this is just a noop for those schemes that# do not wish to be a subclass of URI::URL::_generic)sub abs { shift->clone; }sub rel { shift->clone; }# This method should always be overridden in subclassessub as_string { "<URL>";}# Compare two URLs, subclasses will provide a more correct implementationsub eq { my($self, $other) = @_; $other = URI::URL->new($other, $self) unless ref $other; ref($self) eq ref($other) && $self->scheme eq $other->scheme && $self->as_string eq $other->as_string; # Case-sensitive}# This is set up as an alias for various methodssub bad_method { my $self = shift; my $scheme = $self->scheme; Carp::croak("Illegal method called for $scheme: URL") if $Strict_URL; # Carp::carp("Illegal method called for $scheme: URL") # if $^W; undef;}sub print_on{ no strict qw(refs); # because we use strings as filehandles my $self = shift; my $fh = shift || 'STDERR'; my($k, $v); print $fh "Dump of URI::URL $self...\n"; foreach $k (sort keys %$self){ $v = $self->{$k}; $v = 'UNDEF' unless defined $v; print $fh " $k\t'$v'\n"; }}1;############################################################################# D O C U M E N T A T I O N#########################################################################=head1 NAMEURI::URL - Uniform Resource Locators (absolute and relative)=head1 SYNOPSIS use URI::URL; # Constructors $url1 = new URI::URL 'http://www.perl.com/%7Euser/gisle.gif'; $url2 = new URI::URL 'gisle.gif', 'http://www.com/%7Euser'; $url3 = url 'http://www.sn.no/'; # handy constructor $url4 = $url2->abs; # get absolute url using base $url5 = $url2->abs('http:/other/path'); $url6 = newlocal URI::URL 'test'; # Stringify URL $str1 = $url->as_string; # complete escaped URL string $str2 = $url->full_path; # escaped path+params+query $str3 = "$url"; # use operator overloading # Retrieving Generic-RL components: $scheme = $url->scheme; $netloc = $url->netloc; # see user,password,host,port below $path = $url->path; $params = $url->params; $query = $url->query; $frag = $url->frag; # Accessing elements in their escaped form $path = $url->epath; $params = $url->eparams; $query = $url->equery; # Retrieving Network location (netloc) components: $user = $url->user; $password = $url->password; $host = $url->host; $port = $url->port; # returns default if not defined # Retrieve escaped path components as an array @path = $url->path_components; # HTTP query-string access methods @keywords = $url->keywords; @form = $url->query_form; # All methods above can set the field values, e.g: $url->scheme('http'); $url->host('www.w3.org'); $url->port($url->default_port); $url->base($url5); # use string or object $url->keywords(qw(dog bones)); # File methods $url = new URI::URL "file:/foo/bar"; open(F, $url->local_path) or die; # Compare URLs if ($url->eq("http://www.sn.no")) or die;=head1 DESCRIPTIONThis module implements the URI::URL class representing UniformResource Locators (URL). URLs provide a compact string representationfor resources available via the Internet. Both absolute (RFC 1738) andrelative (RFC 1808) URLs are supported.URI::URL objects are created by calling new(), which takes as argumenta string representation of the URL or an existing URL object referenceto be cloned. Specific individual elements can then be accessed viathe scheme(), user(), password(), host(), port(), path(), params(),query() and frag() methods. In addition escaped versions of the path,params and query can be accessed with the epath(), eparams() andequery() methods. Note that some URL schemes will support all thesemethods.The object constructor new() must be able to determine the scheme forthe URL. If a scheme is not specified in the URL itself, it will usethe scheme specified by the base URL. If no base URL scheme is definedthen new() will croak if URI::URL::strict(1) has been invoked,otherwise I<http> is silently assumed. Once the scheme has beendetermined new() then uses the implementor() function to determinewhich class implements that scheme. If no implementor class isdefined for the scheme then new() will croak if URI::URL::strict(1)has been invoked, otherwise the internal generic URL class is assumed.Internally defined schemes are implemented by theURI::URL::I<scheme_name> module. The URI::URL::implementor() functioncan be used to explicitly set the class used to implement a scheme ifyou want to override this.=head1 HOW AND WHEN TO ESCAPE=over 3
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -