⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 url.pm

📁 ARM上的如果你对底层感兴趣
💻 PM
📖 第 1 页 / 共 2 页
字号:
package URI::URL;

$VERSION = "4.15";   # $Date: 1998/07/17 12:27:03 $
sub Version { $VERSION; }

require 5.004;

require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(url);

require AutoLoader;
*AUTOLOAD = \&AutoLoader::AUTOLOAD;

use Carp ();

# Basic lexical elements, taken from RFC 1738:
#
#  safe         = "$" | "-" | "_" | "." | "+"
#  extra        = "!" | "*" | "'" | "(" | ")" | ","
#  national     = "{" | "}" | "|" | "\" | "^" | "~" | "[" | "]" | "`"
#  punctuation  = "<" | ">" | "#" | "%" | <">
#  reserved     = ";" | "/" | "?" | ":" | "@" | "&" | "="
#  escape       = "%" hex hex
#  unreserved   = alpha | digit | safe | extra
#  uchar        = unreserved | escape
#  xchar        = unreserved | reserved | escape

# draft-fielding-url-syntax-05.txt adds '+' to the reserved chars and
# takes '~' out

use strict;
use vars qw($reserved $reserved_no_slash $reserved_no_form $unsafe
	    $Debug $Strict_URL
	   );

$reserved          = ";\\/?:\\@&=+#%"; # RFC 1738 reserved pluss '#' and '%'
$reserved_no_slash = ";?:\\@&=+#%";    # used when escaping path
$reserved_no_form  = ";\\/?:\\@#%";    # used when escaping params and query

# This is the unsafe characters (excluding those reserved)
$unsafe   = "\x00-\x20{}|\\\\^\\[\\]`<>\"\x7F-\xFF";
#$unsafe .= "~";  # according to RFC1738 but not to common practice

$Debug         = 0;     # set to 1 to print URLs on creation
$Strict_URL    = 0;     # see new()

use overload ( '""' => 'as_string', 'fallback' => 1 );

my %Implementor = (); # mapping from scheme to implementation class


# Easy to use constructor
sub url ($;$)
{
    URI::URL->new(@_);
}


# URI::URL objects are implemented as blessed hashes:
#
# Each of the URL components (scheme, netloc, user, password, host,
# port, path, params, query, fragment) are stored under their
# name. The netloc, path, params and query is stored in quoted
# (escaped) form.  The others is stored unquoted (unescaped).
#
# Netloc is special since it is rendundant (same as
# "user:password@host:port") and must be kept in sync with those.
#
# The '_str' key stores a cached stringified version of the URL
# (by definition in quoted form).
# The '_base' key stores the optional base of a relative URL.
#
# The '_orig_url' is used while debugging is on.
#
# Subclasses may add their own keys but must take great care to
# avoid names which might be used in later verions of this module.

sub new
{
    my($class, $init, $base) = @_;

    my $self;
    if (ref $init) {
	$self = $init->clone;
	$self->base($base) if $base;
    } else {
	$init = "" unless defined $init;
	# RFC 1738 appendix suggest that we just ignore extra whitespace
	$init =~ s/\s+//g;
	# Also get rid of any <URL: > wrapper
	$init =~ s/^<(?:URL:)?(.*)>$/$1/;

	# We need a scheme to determine which class to use
	my($scheme) = $init =~ m/^([.+\-\w]+):/;
	if (!$scheme and $base){ # get scheme from base
	    if (ref $base){ # may be object or just a string
		$scheme = $base->scheme;
	    } else {
		$scheme = $1 if $base =~ m/^([.+\-\w]+):/;
	    }
	}
	unless($scheme){
	    Carp::croak("Unable to determine scheme for '$init'")
		if $Strict_URL;
	    $scheme = 'http';
	}
	my $impclass = URI::URL::implementor($scheme);
	unless ($impclass) {
	    Carp::croak("URI::URL scheme '$scheme' is not supported")
		if $Strict_URL;
	    # use generic as fallback
	    require URI::URL::_generic;
	    URI::URL::implementor($scheme, 'URI::URL::_generic');
	    $impclass = 'URI::URL::_generic';
	}

	# hand-off to scheme specific implementation sub-class
	$self->{'_orig_url'} = $init if $Debug;
	$self = $impclass->new($init, $base);
    }
    $self->print_on('STDERR') if $Debug;
    return $self;
}


sub clone
{
    my $self = shift;
    # this work as long as none of the components are references themselves
    bless { %$self }, ref $self;
}


sub implementor
{
    my($scheme, $impclass) = @_;
    unless (defined $scheme) {
	require URI::URL::_generic;
	return 'URI::URL::_generic';
    }

    $scheme = lc($scheme);
    if ($impclass) {
	$impclass->_init_implementor($scheme);
	my $old = $Implementor{$scheme};
	$Implementor{$scheme} = $impclass;
	return $old;
    }

    my $ic = $Implementor{$scheme};
    return $ic if $ic;

    # scheme not yet known, look for internal or
    # preloaded (with 'use') implementation
    $ic = "URI::URL::$scheme";  # default location
    no strict 'refs';
    # check we actually have one for the scheme:
    unless (defined @{"${ic}::ISA"}) {
	# Try to load it
	eval { require "URI/URL/$scheme.pm"; };
	die $@ if $@ && $@ !~ /Can\'t locate/;
	$ic = '' unless defined @{"${ic}::ISA"};
    }
    if ($ic) {
	$ic->_init_implementor($scheme);
	$Implementor{$scheme} = $ic;
    }
    $ic;
}


sub _init_implementor
{
    my($class, $scheme) = @_;
    # Remember that one implementor class may actually
    # serve to implement several URL schemes.

    if ($] < 5.003_17) {
	no strict qw(refs);
	# Setup overloading inheritace - experimental
	%{"${class}::OVERLOAD"} = %URI::URL::OVERLOAD
	    unless defined %{"${class}::OVERLOAD"};
    }
}


# This private method help us implement access to the elements in the
# URI::URL object hash (%$self).  You can set up access to an element
# with a routine similar to this one:
#
#  sub component { shift->_elem('component', @_); }

sub _elem
{
    my $self = shift;
    my $elem = shift;
    my $old = $self->{$elem};
    if (@_) {
	$self->{$elem} = shift;
	$self->{'_str'} = '';        # void cached string
    }
    $old;
}


# Make all standard methods available to all kinds of URLs.  This allow
# us to call these without to much worry when URI::URL::strict(0);

sub bad_method;

*netloc       = \&bad_method;
*user         = \&bad_method;
*password     = \&bad_method;
*host         = \&bad_method;
*port         = \&bad_method;
*default_port = \&bad_method;

*full_path    = \&bad_method;
*epath        = \&bad_method;
*eparams      = \&bad_method;
*equery       = \&bad_method;

*path         = \&bad_method;
*path_components = \&bad_method;
*params       = \&bad_method;
*query        = \&bad_method;
*frag         = \&bad_method;


#
# A U T O  L O A D I N G
#
# The rest of the methods are autoloaded because they should be less
# frequently used.  We define stubs here so that inheritance works as
# it should.
sub newlocal;
sub strict;
sub base;
sub scheme;
sub crack;
sub abs;
sub rel;
sub as_string;
sub eq;
sub print_on;

# Don't need DESTROY but avoid trying to AUTOLOAD it.
sub DESTROY { }

1;
__END__

sub newlocal
{
    require URI::URL::file;
    my $class = shift;
    URI::URL::file->newlocal(@_);  # pass it on the the file class
}

sub strict
{
    return $Strict_URL unless @_;
    my $old = $Strict_URL;
    $Strict_URL = $_[0];
    $old;
}

# Access some attributes of a URL object:
sub base {
    my $self = shift;
    my $base  = $self->{'_base'};

    if (@_) { # set
	my $new_base = shift;
	$new_base = $new_base->abs if ref($new_base);  # unsure absoluteness
	$self->{_base} = $new_base;
    }
    return unless defined wantarray;

    # The base attribute supports 'lazy' conversion from URL strings
    # to URL objects. Strings may be stored but when a string is
    # fetched it will automatically be converted to a URL object.
    # The main benefit is to make it much cheaper to say:
    #   new URI::URL $random_url_string, 'http:'
    if (defined($base) && !ref($base)) {
	$base = new URI::URL $base;
	$self->_elem('_base', $base); # set new object
    }
    $base;
}

sub scheme {
    my $self = shift;
    my $old = $self->{'scheme'};
    if (@_) {
	my $new_scheme = shift;
	if (defined($new_scheme) && length($new_scheme)) {
	    # reparse URL with new scheme
	    my $str = $self->as_string;
	    $str =~ s/^[\w+\-.]+://;
	    my $newself = new URI::URL "$new_scheme:$str";
	    %$self = %$newself;
	    bless $self, ref($newself);
	} else {
	    $self->{'scheme'} = undef;
	}
    }
    $old;
}

sub crack
{
    # should be overridden by subclasses
    my $self = shift;
    ($self->scheme,  # 0: scheme
     undef,          # 1: user
     undef,          # 2: passwd
     undef,          # 3: host
     undef,          # 4: port
     undef,          # 5: path
     undef,          # 6: params
     undef,          # 7: query
     undef           # 8: fragment
    )
}

# These are overridden by _generic (this is just a noop for those schemes that
# do not wish to be a subclass of URI::URL::_generic)
sub abs { shift->clone; }
sub rel { shift->clone; }

# This method should always be overridden in subclasses
sub as_string {
    "<URL>";
}

# Compare two URLs, subclasses will provide a more correct implementation
sub eq {
    my($self, $other) = @_;
    $other = URI::URL->new($other, $self) unless ref $other;
    ref($self) eq ref($other) &&
      $self->scheme eq $other->scheme &&
      $self->as_string eq $other->as_string;  # Case-sensitive
}

# This is set up as an alias for various methods
sub bad_method {
    my $self = shift;
    my $scheme = $self->scheme;
    Carp::croak("Illegal method called for $scheme: URL")
	if $Strict_URL;
    # Carp::carp("Illegal method called for $scheme: URL")
    #     if $^W;
    undef;
}

sub print_on
{
    no strict qw(refs);  # because we use strings as filehandles
    my $self = shift;
    my $fh = shift || 'STDERR';
    my($k, $v);
    print $fh "Dump of URI::URL $self...\n";
    foreach $k (sort keys %$self){
	$v = $self->{$k};
	$v = 'UNDEF' unless defined $v;
	print $fh "  $k\t'$v'\n";
    }
}

1;


#########################################################################
#### D O C U M E N T A T I O N
#########################################################################

=head1 NAME

URI::URL - Uniform Resource Locators (absolute and relative)

=head1 SYNOPSIS

 use URI::URL;

 # Constructors
 $url1 = new URI::URL 'http://www.perl.com/%7Euser/gisle.gif';
 $url2 = new URI::URL 'gisle.gif', 'http://www.com/%7Euser';
 $url3 = url 'http://www.sn.no/'; # handy constructor
 $url4 = $url2->abs;       # get absolute url using base
 $url5 = $url2->abs('http:/other/path');
 $url6 = newlocal URI::URL 'test';

 # Stringify URL
 $str1 = $url->as_string;  # complete escaped URL string
 $str2 = $url->full_path;  # escaped path+params+query
 $str3 = "$url";           # use operator overloading

 # Retrieving Generic-RL components:
 $scheme   = $url->scheme;
 $netloc   = $url->netloc; # see user,password,host,port below
 $path     = $url->path;
 $params   = $url->params;
 $query    = $url->query;
 $frag     = $url->frag;

 # Accessing elements in their escaped form
 $path     = $url->epath;
 $params   = $url->eparams;
 $query    = $url->equery;

 # Retrieving Network location (netloc) components:
 $user     = $url->user;
 $password = $url->password;
 $host     = $url->host;
 $port     = $url->port;   # returns default if not defined

 # Retrieve escaped path components as an array
 @path     = $url->path_components;

 # HTTP query-string access methods
 @keywords = $url->keywords;
 @form     = $url->query_form;

 # All methods above can set the field values, e.g:
 $url->scheme('http');
 $url->host('www.w3.org');
 $url->port($url->default_port);
 $url->base($url5);                      # use string or object
 $url->keywords(qw(dog bones));

 # File methods
 $url = new URI::URL "file:/foo/bar";
 open(F, $url->local_path) or die;

 # Compare URLs
 if ($url->eq("http://www.sn.no")) or die;

=head1 DESCRIPTION

This module implements the URI::URL class representing Uniform
Resource Locators (URL). URLs provide a compact string representation
for resources available via the Internet. Both absolute (RFC 1738) and
relative (RFC 1808) URLs are supported.

URI::URL objects are created by calling new(), which takes as argument
a string representation of the URL or an existing URL object reference
to be cloned. Specific individual elements can then be accessed via
the scheme(), user(), password(), host(), port(), path(), params(),
query() and frag() methods.  In addition escaped versions of the path,
params and query can be accessed with the epath(), eparams() and
equery() methods.  Note that some URL schemes will support all these
methods.

The object constructor new() must be able to determine the scheme for
the URL.  If a scheme is not specified in the URL itself, it will use
the scheme specified by the base URL. If no base URL scheme is defined
then new() will croak if URI::URL::strict(1) has been invoked,
otherwise I<http> is silently assumed.  Once the scheme has been
determined new() then uses the implementor() function to determine
which class implements that scheme.  If no implementor class is
defined for the scheme then new() will croak if URI::URL::strict(1)
has been invoked, otherwise the internal generic URL class is assumed.

Internally defined schemes are implemented by the
URI::URL::I<scheme_name> module.  The URI::URL::implementor() function
can be used to explicitly set the class used to implement a scheme if
you want to override this.


=head1 HOW AND WHEN TO ESCAPE


=over 3

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -