📄 idna_convert.class.php
字号:
$output .= $encoded;
} else {
$output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
}
return $output;
} else {
if ($output = $this->_encode($decoded)) {
return $output;
} else {
return $this->_ucs4_to_utf8($decoded);
}
}
}
/**
* Use this method to get the last error ocurred
* @param void
* @return string The last error, that occured
* @access public
*/
function get_last_error()
{
return $this->_error;
}
/**
* The actual decoding algorithm
* @access private
*/
function _decode($encoded)
{
// We do need to find the Punycode prefix
if (!preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $encoded)) {
$this->_error('This is not a punycode string');
return false;
}
$encode_test = preg_replace('!^'.preg_quote($this->_punycode_prefix, '!').'!', '', $encoded);
// If nothing left after removing the prefix, it is hopeless
if (!$encode_test) {
$this->_error('The given encoded string was empty');
return false;
}
// Find last occurence of the delimiter
$delim_pos = strrpos($encoded, '-');
if ($delim_pos > strlen($this->_punycode_prefix)) {
for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) {
$decoded[] = ord($encoded{$k});
}
} else {
$decoded = array();
}
$deco_len = count($decoded);
$enco_len = strlen($encoded);
// Wandering through the strings; init
$is_first = true;
$bias = $this->_initial_bias;
$idx = 0;
$char = $this->_initial_n;
for ($enco_idx = ($delim_pos) ? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {
$digit = $this->_decode_digit($encoded{$enco_idx++});
$idx += $digit * $w;
$t = ($k <= $bias) ? $this->_tmin :
(($k >= $bias + $this->_tmax) ? $this->_tmax : ($k - $bias));
if ($digit < $t) break;
$w = (int) ($w * ($this->_base - $t));
}
$bias = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);
$is_first = false;
$char += (int) ($idx / ($deco_len + 1));
$idx %= ($deco_len + 1);
if ($deco_len > 0) {
// Make room for the decoded char
for ($i = $deco_len; $i > $idx; $i--) {
$decoded[$i] = $decoded[($i - 1)];
}
}
$decoded[$idx++] = $char;
}
return $this->_ucs4_to_utf8($decoded);
}
/**
* The actual encoding algorithm
* @access private
*/
function _encode($decoded)
{
// We cannot encode a domain name containing the Punycode prefix
$extract = strlen($this->_punycode_prefix);
$check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
$check_deco = array_slice($decoded, 0, $extract);
if ($check_pref == $check_deco) {
$this->_error('This is already a punycode string');
return false;
}
// We will not try to encode strings consisting of basic code points only
$encodable = false;
foreach ($decoded as $k => $v) {
if ($v > 0x7a) {
$encodable = true;
break;
}
}
if (!$encodable) {
$this->_error('The given string does not contain encodable chars');
return false;
}
// Do NAMEPREP
$decoded = $this->_nameprep($decoded);
if (!$decoded || !is_array($decoded)) return false; // NAMEPREP failed
$deco_len = count($decoded);
if (!$deco_len) return false; // Empty array
$codecount = 0; // How many chars have been consumed
$encoded = '';
// Copy all basic code points to output
for ($i = 0; $i < $deco_len; ++$i) {
$test = $decoded[$i];
// Will match [0-9a-zA-Z-]
if ((0x2F < $test && $test < 0x40)
|| (0x40 < $test && $test < 0x5B)
|| (0x60 < $test && $test <= 0x7B)
|| (0x2D == $test)) {
$encoded .= chr($decoded[$i]);
$codecount++;
}
}
if ($codecount == $deco_len) return $encoded; // All codepoints were basic ones
// Start with the prefix; copy it to output
$encoded = $this->_punycode_prefix.$encoded;
// If we have basic code points in output, add an hyphen to the end
if ($codecount) $encoded .= '-';
// Now find and encode all non-basic code points
$is_first = true;
$cur_code = $this->_initial_n;
$bias = $this->_initial_bias;
$delta = 0;
while ($codecount < $deco_len) {
// Find the smallest code point >= the current code point and
// remember the last ouccrence of it in the input
for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
$next_code = $decoded[$i];
}
}
$delta += ($next_code - $cur_code) * ($codecount + 1);
$cur_code = $next_code;
// Scan input again and encode all characters whose code point is $cur_code
for ($i = 0; $i < $deco_len; $i++) {
if ($decoded[$i] < $cur_code) {
$delta++;
} elseif ($decoded[$i] == $cur_code) {
for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {
$t = ($k <= $bias) ? $this->_tmin :
(($k >= $bias + $this->_tmax) ? $this->_tmax : $k - $bias);
if ($q < $t) break;
$encoded .= $this->_encode_digit(ceil($t + (($q - $t) % ($this->_base - $t))));
$q = ($q - $t) / ($this->_base - $t);
}
$encoded .= $this->_encode_digit($q);
$bias = $this->_adapt($delta, $codecount+1, $is_first);
$codecount++;
$delta = 0;
$is_first = false;
}
}
$delta++;
$cur_code++;
}
return $encoded;
}
/**
* Adapt the bias according to the current code point and position
* @access private
*/
function _adapt($delta, $npoints, $is_first)
{
$delta = (int) ($is_first ? ($delta / $this->_damp) : ($delta / 2));
$delta += (int) ($delta / $npoints);
for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {
$delta = (int) ($delta / ($this->_base - $this->_tmin));
}
return (int) ($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
}
/**
* Encoding a certain digit
* @access private
*/
function _encode_digit($d)
{
return chr($d + 22 + 75 * ($d < 26));
}
/**
* Decode a certain digit
* @access private
*/
function _decode_digit($cp)
{
$cp = ord($cp);
return ($cp - 48 < 10) ? $cp - 22 : (($cp - 65 < 26) ? $cp - 65 : (($cp - 97 < 26) ? $cp - 97 : $this->_base));
}
/**
* Internal error handling method
* @access private
*/
function _error($error = '')
{
$this->_error = $error;
}
/**
* Do Nameprep according to RFC3491 and RFC3454
* @param array Unicode Characters
* @return string Unicode Characters, Nameprep'd
* @access private
*/
function _nameprep($input)
{
$output = array();
$error = false;
//
// Mapping
// Walking through the input array, performing the required steps on each of
// the input chars and putting the result into the output array
// While mapping required chars we apply the cannonical ordering
// $this->_show_hex($input);
foreach ($input as $v) {
// Map to nothing == skip that code point
if (in_array($v, $this->_np_['map_nothing'])) continue;
// Try to find prohibited input
if (in_array($v, $this->_np_['prohibit']) || in_array($v, $this->_np_['general_prohibited'])) {
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
return false;
}
foreach ($this->_np_['prohibit_ranges'] as $range) {
if ($range[0] <= $v && $v <= $range[1]) {
$this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
return false;
}
}
//
// Hangul syllable decomposition
if (0xAC00 <= $v && $v <= 0xD7AF) {
foreach ($this->_hangul_decompose($v) as $out) {
$output[] = $out;
}
// There's a decomposition mapping for that code point
} elseif (isset($this->_np_['replacemaps'][$v])) {
foreach ($this->_apply_cannonical_ordering($this->_np_['replacemaps'][$v]) as $out) {
$output[] = $out;
}
} else {
$output[] = $v;
}
}
//
// Combine code points
//
$last_class = 0;
$last_starter = 0;
$out_len = count($output);
for ($i = 0; $i < $out_len; ++$i) {
$class = $this->_get_combining_class($output[$i]);
if ((!$last_class || $last_class != $class) && $class) {
// Try to match
$seq_len = $i - $last_starter;
$out = $this->_combine(array_slice($output, $last_starter, $seq_len));
// On match: Replace the last starter with the composed character and remove
// the now redundant non-starter(s)
if ($out) {
$output[$last_starter] = $out;
if (count($out) != $seq_len) {
for ($j = $i+1; $j < $out_len; ++$j) {
$output[$j-1] = $output[$j];
}
unset($output[$out_len]);
}
// Rewind the for loop by one, since there can be more possible compositions
$i--;
$out_len--;
$last_class = ($i == $last_starter) ? 0 : $this->_get_combining_class($output[$i-1]);
continue;
}
}
if (!$class) { // The current class is 0
$last_starter = $i;
}
$last_class = $class;
}
return $output;
}
/**
* Decomposes a Hangul syllable
* (see http://www.unicode.org/unicode/reports/tr15/#Hangul
* @param integer 32bit UCS4 code point
* @return array Either Hangul Syllable decomposed or original 32bit value as one value array
* @access private
*/
function _hangul_decompose($char)
{
$sindex = $char - $this->_sbase;
if ($sindex < 0 || $sindex >= $this->_scount) {
return array($char);
}
$result = array();
$T = $this->_tbase + $sindex % $this->_tcount;
$result[] = (int) ($this->_lbase + $sindex / $this->_ncount);
$result[] = (int) ($this->_vbase + ($sindex % $this->_ncount) / $this->_tcount);
if ($T != $this->_tbase) $result[] = $T;
return $result;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -