⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 idna_convert.class.php

📁 Joomla15 - 最新开源CMS
💻 PHP
📖 第 1 页 / 共 3 页
字号:
<?php
/* ------------------------------------------------------------------------- */
/* idna_convert.class.php - Encode / Decode Internationalized Domain Names   */
/* (c) 2004-2005 phlyLabs, Berlin (http://phlylabs.de)                       */
/* All rights reserved                                                       */
/* v0.4.2                                                                    */
/* ------------------------------------------------------------------------- */

// {{{ license

/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
//
// +----------------------------------------------------------------------+
// | This library is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU Lesser General Public License as       |
// | published by the Free Software Foundation; either version 2.1 of the |
// | License, or (at your option) any later version.                      |
// |                                                                      |
// | This library is distributed in the hope that it will be useful, but  |
// | WITHOUT ANY WARRANTY; without even the implied warranty of           |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    |
// | Lesser General Public License for more details.                      |
// |                                                                      |
// | You should have received a copy of the GNU Lesser General Public     |
// | License along with this library; if not, write to the Free Software  |
// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 |
// | USA.                                                                 |
// +----------------------------------------------------------------------+
//

// }}}

/**
 * Encode/decode Internationalized Domain Names.
 *
 * The class allows to convert internationalized domain names
 * (see RFC 3490 for details) as they can be used with various registries worldwide
 * to be translated between their original (localized) form and their encoded form
 * as it will be used in the DNS (Domain Name System).
 *
 * The class provides two public methods, encode() and decode(), which do exactly
 * what you would expect them to do. You are allowed to use complete domain names,
 * simple strings and complete email addresses as well. That means, that you might
 * use any of the following notations:
 *
 * - www.n枚rgler.com
 * - xn--nrgler-wxa
 * - xn--brse-5qa.xn--knrz-1ra.info
 *
 * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4
 * array. Unicode output is available in the same formats.
 * You can select your preferred format via {@link set_paramter()}.
 *
 * ACE input and output is always expected to be ASCII.
 *
 * @author  Matthias Sommerfeld <mso@phlylabs.de>
 * @version 0.4.2
 *
 */

class idna_convert
{
    // {{{ npdata
    /**
     * Holds all relevant mapping tables, loaded from a seperate file on construct
     * See RFC3454 for details
     *
     * @var array
     * @access private
     */
    var $_np_ = array();
    // }}}


    // Internal settings, do not mess with them
    var $_punycode_prefix = 'xn--';
    var $_invalid_ucs =     0x80000000;
    var $_max_ucs =         0x10FFFF;
    var $_base =            36;
    var $_tmin =            1;
    var $_tmax =            26;
    var $_skew =            38;
    var $_damp =            700;
    var $_initial_bias =    72;
    var $_initial_n =       0x80;
    var $_sbase =           0xAC00;
    var $_lbase =           0x1100;
    var $_vbase =           0x1161;
    var $_tbase =           0x11a7;
    var $_lcount =          19;
    var $_vcount =          21;
    var $_tcount =          28;
    var $_ncount =          588;   // _vcount * _tcount
    var $_scount =          11172; // _lcount * _tcount * _vcount
    var $_error =           false;

    // See set_parameter() for details of how to change the following settings
    // from within your script / application
    var $_api_encoding   =  'utf8'; // Default input charset is UTF-8
    var $_allow_overlong =  false;  // Overlong UTF-8 encodings are forbidden
    var $_strict_mode    =  false;  // Behave strict or not

    // The constructor
    function idna_convert($options = false)
    {
        $this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
        if (function_exists('file_get_contents')) {
            $this->_np_ = unserialize(file_get_contents(dirname(__FILE__).'/npdata.ser'));
        } else {
            $this->_np_ = unserialize(join('', file(dirname(__FILE__).'/npdata.ser')));
        }
        // If parameters are given, pass these to the respective method
        if (is_array($options)) {
            return $this->set_parameter($options);
        }
        return true;
    }

    /**
    * Sets a new option value. Available options and values:
    * [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,
    *         'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]
    * [overlong - Unicode does not allow unnecessarily long encodings of chars,
    *             to allow this, set this parameter to true, else to false;
    *             default is false.]
    * [strict - true: strict mode, good for registration purposes - Causes errors
    *           on failures; false: loose mode, ideal for "wildlife" applications
    *           by silently ignoring errors and returning the original input instead
    *
    * @param    mixed     Parameter to set (string: single parameter; array of Parameter => Value pairs)
    * @param    string    Value to use (if parameter 1 is a string)
    * @return   boolean   true on success, false otherwise
    * @access   public
    */
    function set_parameter($option, $value = false)
    {
        if (!is_array($option)) {
            $option = array($option => $value);
        }
        foreach ($option as $k => $v) {
            switch ($k) {
            case 'encoding':
                switch ($v) {
                case 'utf8':
                case 'ucs4_string':
                case 'ucs4_array':
                    $this->_api_encoding = $v;
                    break;
                default:
                    $this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
                    return false;
                }
                break;
            case 'overlong':
                $this->_allow_overlong = ($v) ? true : false;
                break;
            case 'strict':
                $this->_strict_mode = ($v) ? true : false;
                break;
            default:
                $this->_error('Set Parameter: Unknown option '.$k);
                return false;
            }
        }
        return true;
    }

    /**
    * Decode a given ACE domain name
    * @param    string   Domain name (ACE string)
    * [@param    string   Desired output encoding, see {@link set_parameter}]
    * @return   string   Decoded Domain name (UTF-8 or UCS-4)
    * @access   public
    */
    function decode($input, $one_time_encoding = false)
    {
        // Optionally set
        if ($one_time_encoding) {
            switch ($one_time_encoding) {
            case 'utf8':
            case 'ucs4_string':
            case 'ucs4_array':
                break;
            default:
                $this->_error('Unknown encoding '.$one_time_encoding);
                return false;
            }
        }
        // Make sure to drop any newline characters around
        $input = trim($input);

        // Negotiate input and try to determine, wether it is a plain string,
        // an email address or something like a complete URL
        if (strpos($input, '@')) { // Maybe it is an email address
            // No no in strict mode
            if ($this->_strict_mode) {
                $this->_error('Only simple domain name parts can be handled in strict mode');
                return false;
            }
            list($email_pref, $input) = explode('@', $input, 2);
            $arr = explode('.', $input);
            foreach ($arr as $k => $v) {
                $conv = $this->_decode($v);
                if ($conv) $arr[$k] = $conv;
            }
            $return = $email_pref . '@' . join('.', $arr);
        } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
            // No no in strict mode
            if ($this->_strict_mode) {
                $this->_error('Only simple domain name parts can be handled in strict mode');
                return false;
            }
            $parsed = parse_url($input);
            if (isset($parsed['host'])) {
                $arr = explode('.', $parsed['host']);
                foreach ($arr as $k => $v) {
                    $conv = $this->_decode($v);
                    if ($conv) $arr[$k] = $conv;
                }
                $parsed['host'] = join('.', $arr);
                $return =
                        (empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://'))
                        .(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@')
                        .$parsed['host']
                        .(empty($parsed['port']) ? '' : ':'.$parsed['port'])
                        .$parsed['path']
                        .(empty($parsed['query']) ? '' : '?'.$parsed['query'])
                        .(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']);
            } else { // parse_url seems to have failed, try without it
                $arr = explode('.', $input);
                foreach ($arr as $k => $v) {
                    $conv = $this->_decode($v);
                    if ($conv) $arr[$k] = $conv;
                }
                $return = join('.', $arr);
            }
        } else { // Otherwise we consider it being a pure domain name string
            $return = $this->_decode($input);
        }
        // The output is UTF-8 by default, other output formats need conversion here
        // If one time encoding is given, use this, else the objects property
        switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
        case 'utf8':
            return $return;
            break;
        case 'ucs4_string':
           return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
           break;
        case 'ucs4_array':
            return $this->_utf8_to_ucs4($return);
            break;
        default:
            $this->_error('Unsupported output format');
            return false;
        }
    }

    /**
    * Encode a given UTF-8 domain name
    * @param    string   Domain name (UTF-8 or UCS-4)
    * [@param    string   Desired input encoding, see {@link set_parameter}]
    * @return   string   Encoded Domain name (ACE string)
    * @access   public
    */
    function encode($decoded, $one_time_encoding = false)
    {
        // Forcing conversion of input to UCS4 array
        // If one time encoding is given, use this, else the objects property
        switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
        case 'utf8':
            $decoded = $this->_utf8_to_ucs4($decoded);
            break;
        case 'ucs4_string':
           $decoded = $this->_ucs4_string_to_ucs4($decoded);
        case 'ucs4_array':
           break;
        default:
            // $this->_error('Unsupported input format: '.$this->_api_encoding);
            $this->_error('Unsupported input format');
            return false;
        }

        // No input, no output, what else did you expect?
        if (empty($decoded)) return '';

        // Anchors for iteration
        $last_begin = 0;
        // Output string
        $output = '';
        foreach ($decoded as $k => $v) {
            // Make sure to use just the plain dot
            switch($v) {
            case 0x3002:
            case 0xFF0E:
            case 0xFF61:
                $decoded[$k] = 0x2E;
                // It's right, no break here
                // The codepoints above have to be converted to dots anyway

            // Stumbling across an anchoring character
            case 0x2E:
            case 0x2F:
            case 0x3A:
            case 0x3F:
            case 0x40:
                // Neither email addresses nor URLs allowed in strict mode
                if ($this->_strict_mode) {
                   $this->_error('Neither email addresses nor URLs are allowed in strict mode.');
                   return false;
                } else {
                    // Skip first char
                    if ($k) {
                        $encoded = '';
                        $encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
                        if ($encoded) {
                            $output .= $encoded;
                        } else {
                            $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
                        }
                        $output .= chr($decoded[$k]);
                    }
                    $last_begin = $k + 1;
                }
            }
        }
        // Catch the rest of the string
        if ($last_begin) {
            $inp_len = sizeof($decoded);
            $encoded = '';
            $encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
            if ($encoded) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -