📄 idna_convert.class.php
字号:
<?php// {{{ license/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: *///// +----------------------------------------------------------------------+// | This library is free software; you can redistribute it and/or modify |// | it under the terms of the GNU Lesser General Public License as |// | published by the Free Software Foundation; either version 2.1 of the |// | License, or (at your option) any later version. |// | |// | This library is distributed in the hope that it will be useful, but |// | WITHOUT ANY WARRANTY; without even the implied warranty of |// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |// | Lesser General Public License for more details. |// | |// | You should have received a copy of the GNU Lesser General Public |// | License along with this library; if not, write to the Free Software |// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |// | USA. |// +----------------------------------------------------------------------+//// }}}/** * Encode/decode Internationalized Domain Names. * * The class allows to convert internationalized domain names * (see RFC 3490 for details) as they can be used with various registries worldwide * to be translated between their original (localized) form and their encoded form * as it will be used in the DNS (Domain Name System). * * The class provides two public methods, encode() and decode(), which do exactly * what you would expect them to do. You are allowed to use complete domain names, * simple strings and complete email addresses as well. That means, that you might * use any of the following notations: * * - www.n枚rgler.com * - xn--nrgler-wxa * - xn--brse-5qa.xn--knrz-1ra.info * * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4 * array. Unicode output is available in the same formats. * You can select your preferred format via {@link set_paramter()}. * * ACE input and output is always expected to be ASCII. * * @author Matthias Sommerfeld <mso@phlylabs.de> * @copyright 2004-2007 phlyLabs Berlin, http://phlylabs.de * @version 0.5.1 * */class idna_convert{ /** * Holds all relevant mapping tables, loaded from a seperate file on construct * See RFC3454 for details * * @var array * @access private */ var $NP = array(); // Internal settings, do not mess with them var $_punycode_prefix = 'xn--'; var $_invalid_ucs = 0x80000000; var $_max_ucs = 0x10FFFF; var $_base = 36; var $_tmin = 1; var $_tmax = 26; var $_skew = 38; var $_damp = 700; var $_initial_bias = 72; var $_initial_n = 0x80; var $_sbase = 0xAC00; var $_lbase = 0x1100; var $_vbase = 0x1161; var $_tbase = 0x11A7; var $_lcount = 19; var $_vcount = 21; var $_tcount = 28; var $_ncount = 588; // _vcount * _tcount var $_scount = 11172; // _lcount * _tcount * _vcount var $_error = false; // See {@link set_paramter()} for details of how to change the following // settings from within your script / application var $_api_encoding = 'utf8'; // Default input charset is UTF-8 var $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden var $_strict_mode = false; // Behave strict or not // The constructor function idna_convert($options = false) { $this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount; if (function_exists('file_get_contents')) { $this->NP = unserialize(file_get_contents(dirname(__FILE__).'/npdata.ser')); } else { $this->NP = unserialize(join('', file(dirname(__FILE__).'/npdata.ser'))); } // If parameters are given, pass these to the respective method if (is_array($options)) { return $this->set_parameter($options); } return true; } /** * Sets a new option value. Available options and values: * [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8, * 'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8] * [overlong - Unicode does not allow unnecessarily long encodings of chars, * to allow this, set this parameter to true, else to false; * default is false.] * [strict - true: strict mode, good for registration purposes - Causes errors * on failures; false: loose mode, ideal for "wildlife" applications * by silently ignoring errors and returning the original input instead * * @param mixed Parameter to set (string: single parameter; array of Parameter => Value pairs) * @param string Value to use (if parameter 1 is a string) * @return boolean true on success, false otherwise * @access public */ function set_parameter($option, $value = false) { if (!is_array($option)) { $option = array($option => $value); } foreach ($option as $k => $v) { switch ($k) { case 'encoding': switch ($v) { case 'utf8': case 'ucs4_string': case 'ucs4_array': $this->_api_encoding = $v; break; default: $this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k); return false; } break; case 'overlong': $this->_allow_overlong = ($v) ? true : false; break; case 'strict': $this->_strict_mode = ($v) ? true : false; break; default: $this->_error('Set Parameter: Unknown option '.$k); return false; } } return true; } /** * Decode a given ACE domain name * @param string Domain name (ACE string) * [@param string Desired output encoding, see {@link set_parameter}] * @return string Decoded Domain name (UTF-8 or UCS-4) * @access public */ function decode($input, $one_time_encoding = false) { // Optionally set if ($one_time_encoding) { switch ($one_time_encoding) { case 'utf8': case 'ucs4_string': case 'ucs4_array': break; default: $this->_error('Unknown encoding '.$one_time_encoding); return false; } } // Make sure to drop any newline characters around $input = trim($input); // Negotiate input and try to determine, whether it is a plain string, // an email address or something like a complete URL if (strpos($input, '@')) { // Maybe it is an email address // No no in strict mode if ($this->_strict_mode) { $this->_error('Only simple domain name parts can be handled in strict mode'); return false; } list ($email_pref, $input) = explode('@', $input, 2); $arr = explode('.', $input); foreach ($arr as $k => $v) { if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) { $conv = $this->_decode($v); if ($conv) $arr[$k] = $conv; } } $input = join('.', $arr); $arr = explode('.', $email_pref); foreach ($arr as $k => $v) { if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) { $conv = $this->_decode($v); if ($conv) $arr[$k] = $conv; } } $email_pref = join('.', $arr); $return = $email_pref . '@' . $input; } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters) // No no in strict mode if ($this->_strict_mode) { $this->_error('Only simple domain name parts can be handled in strict mode'); return false; } $parsed = parse_url($input); if (isset($parsed['host'])) { $arr = explode('.', $parsed['host']); foreach ($arr as $k => $v) { $conv = $this->_decode($v); if ($conv) $arr[$k] = $conv; } $parsed['host'] = join('.', $arr); $return = (empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://')) .(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@') .$parsed['host'] .(empty($parsed['port']) ? '' : ':'.$parsed['port']) .(empty($parsed['path']) ? '' : $parsed['path']) .(empty($parsed['query']) ? '' : '?'.$parsed['query']) .(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']); } else { // parse_url seems to have failed, try without it $arr = explode('.', $input); foreach ($arr as $k => $v) { $conv = $this->_decode($v); $arr[$k] = ($conv) ? $conv : $v; } $return = join('.', $arr); } } else { // Otherwise we consider it being a pure domain name string $return = $this->_decode($input); if (!$return) $return = $input; } // The output is UTF-8 by default, other output formats need conversion here // If one time encoding is given, use this, else the objects property switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) { case 'utf8': return $return; break; case 'ucs4_string': return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return)); break; case 'ucs4_array': return $this->_utf8_to_ucs4($return); break; default: $this->_error('Unsupported output format'); return false; } } /** * Encode a given UTF-8 domain name * @param string Domain name (UTF-8 or UCS-4) * [@param string Desired input encoding, see {@link set_parameter}] * @return string Encoded Domain name (ACE string) * @access public */ function encode($decoded, $one_time_encoding = false) { // Forcing conversion of input to UCS4 array // If one time encoding is given, use this, else the objects property switch ($one_time_encoding ? $one_time_encoding : $this->_api_encoding) { case 'utf8': $decoded = $this->_utf8_to_ucs4($decoded); break; case 'ucs4_string': $decoded = $this->_ucs4_string_to_ucs4($decoded); case 'ucs4_array': break; default: $this->_error('Unsupported input format: '.($one_time_encoding ? $one_time_encoding : $this->_api_encoding)); return false; } // No input, no output, what else did you expect? if (empty($decoded)) return ''; // Anchors for iteration $last_begin = 0; // Output string $output = ''; foreach ($decoded as $k => $v) { // Make sure to use just the plain dot switch($v) { case 0x3002: case 0xFF0E: case 0xFF61: $decoded[$k] = 0x2E; // Right, no break here, the above are converted to dots anyway // Stumbling across an anchoring character case 0x2E: case 0x2F: case 0x3A: case 0x3F: case 0x40: // Neither email addresses nor URLs allowed in strict mode if ($this->_strict_mode) { $this->_error('Neither email addresses nor URLs are allowed in strict mode.'); return false; } else { // Skip first char if ($k) { $encoded = ''; $encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin))); if ($encoded) { $output .= $encoded; } else { $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin))); } $output .= chr($decoded[$k]); } $last_begin = $k + 1; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -