url.php

来自「PHP 知识管理系统(基于树结构的知识管理系统), 英文原版的PHP源码。」· PHP 代码 · 共 528 行 · 第 1/2 页

PHP
528
字号
<?php
/**
 *  base include file for SimpleTest
 *  @package    SimpleTest
 *  @subpackage WebTester
 *  @version    $Id: url.php 1723 2008-04-08 00:34:10Z lastcraft $
 */

/**#@+
 *  include other SimpleTest class files
 */
require_once(dirname(__FILE__) . '/encoding.php');
/**#@-*/

/**
 *    URL parser to replace parse_url() PHP function which
 *    got broken in PHP 4.3.0. Adds some browser specific
 *    functionality such as expandomatics.
 *    Guesses a bit trying to separate the host from
 *    the path and tries to keep a raw, possibly unparsable,
 *    request string as long as possible.
 *    @package SimpleTest
 *    @subpackage WebTester
 */
class SimpleUrl {
    var $_scheme;
    var $_username;
    var $_password;
    var $_host;
    var $_port;
    var $_path;
    var $_request;
    var $_fragment;
    var $_x;
    var $_y;
    var $_target;
    var $_raw = false;
    
    /**
     *    Constructor. Parses URL into sections.
     *    @param string $url        Incoming URL.
     *    @access public
     */
    function SimpleUrl($url = '') {
        list($x, $y) = $this->_chompCoordinates($url);
        $this->setCoordinates($x, $y);
        $this->_scheme = $this->_chompScheme($url);
        list($this->_username, $this->_password) = $this->_chompLogin($url);
        $this->_host = $this->_chompHost($url);
        $this->_port = false;
        if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) {
            $this->_host = $host_parts[1];
            $this->_port = (integer)$host_parts[2];
        }
        $this->_path = $this->_chompPath($url);
        $this->_request = $this->_parseRequest($this->_chompRequest($url));
        $this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
        $this->_target = false;
    }
    
    /**
     *    Extracts the X, Y coordinate pair from an image map.
     *    @param string $url   URL so far. The coordinates will be
     *                         removed.
     *    @return array        X, Y as a pair of integers.
     *    @access private
     */
    function _chompCoordinates(&$url) {
        if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
            $url = $matches[1];
            return array((integer)$matches[2], (integer)$matches[3]);
        }
        return array(false, false);
    }
    
    /**
     *    Extracts the scheme part of an incoming URL.
     *    @param string $url   URL so far. The scheme will be
     *                         removed.
     *    @return string       Scheme part or false.
     *    @access private
     */
    function _chompScheme(&$url) {
        if (preg_match('/^([^\/:]*):(\/\/)(.*)/', $url, $matches)) {
            $url = $matches[2] . $matches[3];
            return $matches[1];
        }
        return false;
    }
    
    /**
     *    Extracts the username and password from the
     *    incoming URL. The // prefix will be reattached
     *    to the URL after the doublet is extracted.
     *    @param string $url    URL so far. The username and
     *                          password are removed.
     *    @return array         Two item list of username and
     *                          password. Will urldecode() them.
     *    @access private
     */
    function _chompLogin(&$url) {
        $prefix = '';
        if (preg_match('/^(\/\/)(.*)/', $url, $matches)) {
            $prefix = $matches[1];
            $url = $matches[2];
        }
        if (preg_match('/^([^\/]*)@(.*)/', $url, $matches)) {
            $url = $prefix . $matches[2];
            $parts = split(":", $matches[1]);
            return array(
                    urldecode($parts[0]),
                    isset($parts[1]) ? urldecode($parts[1]) : false);
        }
        $url = $prefix . $url;
        return array(false, false);
    }
    
    /**
     *    Extracts the host part of an incoming URL.
     *    Includes the port number part. Will extract
     *    the host if it starts with // or it has
     *    a top level domain or it has at least two
     *    dots.
     *    @param string $url    URL so far. The host will be
     *                          removed.
     *    @return string        Host part guess or false.
     *    @access private
     */
    function _chompHost(&$url) {
        if (preg_match('/^(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) {
            $url = $matches[3];
            return $matches[2];
        }
        if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) {
            $tlds = SimpleUrl::getAllTopLevelDomains();
            if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
                $url = $matches[2] . $matches[3];
                return $matches[1];
            } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
                $url = $matches[2] . $matches[3];
                return $matches[1];
            }
        }
        return false;
    }
    
    /**
     *    Extracts the path information from the incoming
     *    URL. Strips this path from the URL.
     *    @param string $url     URL so far. The host will be
     *                           removed.
     *    @return string         Path part or '/'.
     *    @access private
     */
    function _chompPath(&$url) {
        if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
            $url = $matches[2] . $matches[3];
            return ($matches[1] ? $matches[1] : '');
        }
        return '';
    }
    
    /**
     *    Strips off the request data.
     *    @param string $url  URL so far. The request will be
     *                        removed.
     *    @return string      Raw request part.
     *    @access private
     */
    function _chompRequest(&$url) {
        if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
            $url = $matches[2] . $matches[3];
            return $matches[1];
        }
        return '';
    }
        
    /**
     *    Breaks the request down into an object.
     *    @param string $raw           Raw request.
     *    @return SimpleFormEncoding    Parsed data.
     *    @access private
     */
    function _parseRequest($raw) {
        $this->_raw = $raw;
        $request = new SimpleGetEncoding();
        foreach (split("&", $raw) as $pair) {
            if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
                $request->add($matches[1], urldecode($matches[2]));
            } elseif ($pair) {
                $request->add($pair, '');
            }
        }
        return $request;
    }
    
    /**
     *    Accessor for protocol part.
     *    @param string $default    Value to use if not present.
     *    @return string            Scheme name, e.g "http".
     *    @access public
     */
    function getScheme($default = false) {
        return $this->_scheme ? $this->_scheme : $default;
    }
    
    /**
     *    Accessor for user name.
     *    @return string    Username preceding host.
     *    @access public
     */
    function getUsername() {
        return $this->_username;
    }
    
    /**
     *    Accessor for password.
     *    @return string    Password preceding host.
     *    @access public
     */
    function getPassword() {
        return $this->_password;
    }
    
    /**
     *    Accessor for hostname and port.
     *    @param string $default    Value to use if not present.
     *    @return string            Hostname only.
     *    @access public
     */
    function getHost($default = false) {
        return $this->_host ? $this->_host : $default;
    }
    
    /**
     *    Accessor for top level domain.
     *    @return string       Last part of host.
     *    @access public
     */
    function getTld() {
        $path_parts = pathinfo($this->getHost());
        return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
    }
    
    /**
     *    Accessor for port number.
     *    @return integer    TCP/IP port number.
     *    @access public
     */
    function getPort() {
        return $this->_port;
    }        
            
    /**
     *    Accessor for path.
     *    @return string    Full path including leading slash if implied.
     *    @access public
     */
    function getPath() {
        if (! $this->_path && $this->_host) {
            return '/';
        }
        return $this->_path;
    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?