📄 idna_convert.class.php
字号:
<?php
/* ------------------------------------------------------------------------- */
/* idna_convert.class.php - Encode / Decode Internationalized Domain Names */
/* (c) 2004-2005 phlyLabs, Berlin (http://phlylabs.de) */
/* All rights reserved */
/* v0.4.2 */
/* ------------------------------------------------------------------------- */
// {{{ license
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
//
// +----------------------------------------------------------------------+
// | This library is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU Lesser General Public License as |
// | published by the Free Software Foundation; either version 2.1 of the |
// | License, or (at your option) any later version. |
// | |
// | This library is distributed in the hope that it will be useful, but |
// | WITHOUT ANY WARRANTY; without even the implied warranty of |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
// | Lesser General Public License for more details. |
// | |
// | You should have received a copy of the GNU Lesser General Public |
// | License along with this library; if not, write to the Free Software |
// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
// | USA. |
// +----------------------------------------------------------------------+
//
// }}}
/**
* Encode/decode Internationalized Domain Names.
*
* The class allows to convert internationalized domain names
* (see RFC 3490 for details) as they can be used with various registries worldwide
* to be translated between their original (localized) form and their encoded form
* as it will be used in the DNS (Domain Name System).
*
* The class provides two public methods, encode() and decode(), which do exactly
* what you would expect them to do. You are allowed to use complete domain names,
* simple strings and complete email addresses as well. That means, that you might
* use any of the following notations:
*
* - www.n枚rgler.com
* - xn--nrgler-wxa
* - xn--brse-5qa.xn--knrz-1ra.info
*
* Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4
* array. Unicode output is available in the same formats.
* You can select your preferred format via {@link set_paramter()}.
*
* ACE input and output is always expected to be ASCII.
*
* @author Matthias Sommerfeld <mso@phlylabs.de>
* @version 0.4.2
*
*/
class idna_convert
{
// {{{ npdata
/**
* Holds all relevant mapping tables, loaded from a seperate file on construct
* See RFC3454 for details
*
* @var array
* @access private
*/
var $_np_ = array();
// }}}
// Internal settings, do not mess with them
var $_punycode_prefix = 'xn--';
var $_invalid_ucs = 0x80000000;
var $_max_ucs = 0x10FFFF;
var $_base = 36;
var $_tmin = 1;
var $_tmax = 26;
var $_skew = 38;
var $_damp = 700;
var $_initial_bias = 72;
var $_initial_n = 0x80;
var $_sbase = 0xAC00;
var $_lbase = 0x1100;
var $_vbase = 0x1161;
var $_tbase = 0x11a7;
var $_lcount = 19;
var $_vcount = 21;
var $_tcount = 28;
var $_ncount = 588; // _vcount * _tcount
var $_scount = 11172; // _lcount * _tcount * _vcount
var $_error = false;
// See set_parameter() for details of how to change the following settings
// from within your script / application
var $_api_encoding = 'utf8'; // Default input charset is UTF-8
var $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden
var $_strict_mode = false; // Behave strict or not
// The constructor
function idna_convert($options = false)
{
$this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
if (function_exists('file_get_contents')) {
$this->_np_ = unserialize(file_get_contents(dirname(__FILE__).'/npdata.ser'));
} else {
$this->_np_ = unserialize(join('', file(dirname(__FILE__).'/npdata.ser')));
}
// If parameters are given, pass these to the respective method
if (is_array($options)) {
return $this->set_parameter($options);
}
return true;
}
/**
* Sets a new option value. Available options and values:
* [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,
* 'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]
* [overlong - Unicode does not allow unnecessarily long encodings of chars,
* to allow this, set this parameter to true, else to false;
* default is false.]
* [strict - true: strict mode, good for registration purposes - Causes errors
* on failures; false: loose mode, ideal for "wildlife" applications
* by silently ignoring errors and returning the original input instead
*
* @param mixed Parameter to set (string: single parameter; array of Parameter => Value pairs)
* @param string Value to use (if parameter 1 is a string)
* @return boolean true on success, false otherwise
* @access public
*/
function set_parameter($option, $value = false)
{
if (!is_array($option)) {
$option = array($option => $value);
}
foreach ($option as $k => $v) {
switch ($k) {
case 'encoding':
switch ($v) {
case 'utf8':
case 'ucs4_string':
case 'ucs4_array':
$this->_api_encoding = $v;
break;
default:
$this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
return false;
}
break;
case 'overlong':
$this->_allow_overlong = ($v) ? true : false;
break;
case 'strict':
$this->_strict_mode = ($v) ? true : false;
break;
default:
$this->_error('Set Parameter: Unknown option '.$k);
return false;
}
}
return true;
}
/**
* Decode a given ACE domain name
* @param string Domain name (ACE string)
* [@param string Desired output encoding, see {@link set_parameter}]
* @return string Decoded Domain name (UTF-8 or UCS-4)
* @access public
*/
function decode($input, $one_time_encoding = false)
{
// Optionally set
if ($one_time_encoding) {
switch ($one_time_encoding) {
case 'utf8':
case 'ucs4_string':
case 'ucs4_array':
break;
default:
$this->_error('Unknown encoding '.$one_time_encoding);
return false;
}
}
// Make sure to drop any newline characters around
$input = trim($input);
// Negotiate input and try to determine, wether it is a plain string,
// an email address or something like a complete URL
if (strpos($input, '@')) { // Maybe it is an email address
// No no in strict mode
if ($this->_strict_mode) {
$this->_error('Only simple domain name parts can be handled in strict mode');
return false;
}
list($email_pref, $input) = explode('@', $input, 2);
$arr = explode('.', $input);
foreach ($arr as $k => $v) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
$return = $email_pref . '@' . join('.', $arr);
} elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
// No no in strict mode
if ($this->_strict_mode) {
$this->_error('Only simple domain name parts can be handled in strict mode');
return false;
}
$parsed = parse_url($input);
if (isset($parsed['host'])) {
$arr = explode('.', $parsed['host']);
foreach ($arr as $k => $v) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
$parsed['host'] = join('.', $arr);
$return =
(empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://'))
.(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@')
.$parsed['host']
.(empty($parsed['port']) ? '' : ':'.$parsed['port'])
.$parsed['path']
.(empty($parsed['query']) ? '' : '?'.$parsed['query'])
.(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']);
} else { // parse_url seems to have failed, try without it
$arr = explode('.', $input);
foreach ($arr as $k => $v) {
$conv = $this->_decode($v);
if ($conv) $arr[$k] = $conv;
}
$return = join('.', $arr);
}
} else { // Otherwise we consider it being a pure domain name string
$return = $this->_decode($input);
}
// The output is UTF-8 by default, other output formats need conversion here
// If one time encoding is given, use this, else the objects property
switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
case 'utf8':
return $return;
break;
case 'ucs4_string':
return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
break;
case 'ucs4_array':
return $this->_utf8_to_ucs4($return);
break;
default:
$this->_error('Unsupported output format');
return false;
}
}
/**
* Encode a given UTF-8 domain name
* @param string Domain name (UTF-8 or UCS-4)
* [@param string Desired input encoding, see {@link set_parameter}]
* @return string Encoded Domain name (ACE string)
* @access public
*/
function encode($decoded, $one_time_encoding = false)
{
// Forcing conversion of input to UCS4 array
// If one time encoding is given, use this, else the objects property
switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
case 'utf8':
$decoded = $this->_utf8_to_ucs4($decoded);
break;
case 'ucs4_string':
$decoded = $this->_ucs4_string_to_ucs4($decoded);
case 'ucs4_array':
break;
default:
// $this->_error('Unsupported input format: '.$this->_api_encoding);
$this->_error('Unsupported input format');
return false;
}
// No input, no output, what else did you expect?
if (empty($decoded)) return '';
// Anchors for iteration
$last_begin = 0;
// Output string
$output = '';
foreach ($decoded as $k => $v) {
// Make sure to use just the plain dot
switch($v) {
case 0x3002:
case 0xFF0E:
case 0xFF61:
$decoded[$k] = 0x2E;
// It's right, no break here
// The codepoints above have to be converted to dots anyway
// Stumbling across an anchoring character
case 0x2E:
case 0x2F:
case 0x3A:
case 0x3F:
case 0x40:
// Neither email addresses nor URLs allowed in strict mode
if ($this->_strict_mode) {
$this->_error('Neither email addresses nor URLs are allowed in strict mode.');
return false;
} else {
// Skip first char
if ($k) {
$encoded = '';
$encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
if ($encoded) {
$output .= $encoded;
} else {
$output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
}
$output .= chr($decoded[$k]);
}
$last_begin = $k + 1;
}
}
}
// Catch the rest of the string
if ($last_begin) {
$inp_len = sizeof($decoded);
$encoded = '';
$encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
if ($encoded) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -