📄 idna_convert.class.php
字号:
} // Catch the rest of the string if ($last_begin) { $inp_len = sizeof($decoded); $encoded = ''; $encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin))); if ($encoded) { $output .= $encoded; } else { $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin))); } return $output; } else { if ($output = $this->_encode($decoded)) { return $output; } else { return $this->_ucs4_to_utf8($decoded); } } } /** * Use this method to get the last error ocurred * @param void * @return string The last error, that occured * @access public */ function get_last_error() { return $this->_error; } /** * The actual decoding algorithm * @access private */ function _decode($encoded) { // We do need to find the Punycode prefix if (!preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $encoded)) { $this->_error('This is not a punycode string'); return false; } $encode_test = preg_replace('!^'.preg_quote($this->_punycode_prefix, '!').'!', '', $encoded); // If nothing left after removing the prefix, it is hopeless if (!$encode_test) { $this->_error('The given encoded string was empty'); return false; } // Find last occurence of the delimiter $delim_pos = strrpos($encoded, '-'); if ($delim_pos > strlen($this->_punycode_prefix)) { for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) { $decoded[] = ord($encoded{$k}); } } else { $decoded = array(); } $deco_len = count($decoded); $enco_len = strlen($encoded); // Wandering through the strings; init $is_first = true; $bias = $this->_initial_bias; $idx = 0; $char = $this->_initial_n; for ($enco_idx = ($delim_pos) ? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) { for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) { $digit = $this->_decode_digit($encoded{$enco_idx++}); $idx += $digit * $w; $t = ($k <= $bias) ? $this->_tmin : (($k >= $bias + $this->_tmax) ? $this->_tmax : ($k - $bias)); if ($digit < $t) break; $w = (int) ($w * ($this->_base - $t)); } $bias = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first); $is_first = false; $char += (int) ($idx / ($deco_len + 1)); $idx %= ($deco_len + 1); if ($deco_len > 0) { // Make room for the decoded char for ($i = $deco_len; $i > $idx; $i--) { $decoded[$i] = $decoded[($i - 1)]; } } $decoded[$idx++] = $char; } return $this->_ucs4_to_utf8($decoded); } /** * The actual encoding algorithm * @access private */ function _encode($decoded) { // We cannot encode a domain name containing the Punycode prefix $extract = strlen($this->_punycode_prefix); $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix); $check_deco = array_slice($decoded, 0, $extract); if ($check_pref == $check_deco) { $this->_error('This is already a punycode string'); return false; } // We will not try to encode strings consisting of basic code points only $encodable = false; foreach ($decoded as $k => $v) { if ($v > 0x7a) { $encodable = true; break; } } if (!$encodable) { $this->_error('The given string does not contain encodable chars'); return false; } // Do NAMEPREP $decoded = $this->_nameprep($decoded); if (!$decoded || !is_array($decoded)) return false; // NAMEPREP failed $deco_len = count($decoded); if (!$deco_len) return false; // Empty array $codecount = 0; // How many chars have been consumed $encoded = ''; // Copy all basic code points to output for ($i = 0; $i < $deco_len; ++$i) { $test = $decoded[$i]; // Will match [-0-9a-zA-Z] if ((0x2F < $test && $test < 0x40) || (0x40 < $test && $test < 0x5B) || (0x60 < $test && $test <= 0x7B) || (0x2D == $test)) { $encoded .= chr($decoded[$i]); $codecount++; } } if ($codecount == $deco_len) return $encoded; // All codepoints were basic ones // Start with the prefix; copy it to output $encoded = $this->_punycode_prefix.$encoded; // If we have basic code points in output, add an hyphen to the end if ($codecount) $encoded .= '-'; // Now find and encode all non-basic code points $is_first = true; $cur_code = $this->_initial_n; $bias = $this->_initial_bias; $delta = 0; while ($codecount < $deco_len) { // Find the smallest code point >= the current code point and // remember the last ouccrence of it in the input for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) { if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) { $next_code = $decoded[$i]; } } $delta += ($next_code - $cur_code) * ($codecount + 1); $cur_code = $next_code; // Scan input again and encode all characters whose code point is $cur_code for ($i = 0; $i < $deco_len; $i++) { if ($decoded[$i] < $cur_code) { $delta++; } elseif ($decoded[$i] == $cur_code) { for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) { $t = ($k <= $bias) ? $this->_tmin : (($k >= $bias + $this->_tmax) ? $this->_tmax : $k - $bias); if ($q < $t) break; $encoded .= $this->_encode_digit(intval($t + (($q - $t) % ($this->_base - $t)))); //v0.4.5 Changed from ceil() to intval() $q = (int) (($q - $t) / ($this->_base - $t)); } $encoded .= $this->_encode_digit($q); $bias = $this->_adapt($delta, $codecount+1, $is_first); $codecount++; $delta = 0; $is_first = false; } } $delta++; $cur_code++; } return $encoded; } /** * Adapt the bias according to the current code point and position * @access private */ function _adapt($delta, $npoints, $is_first) { $delta = intval($is_first ? ($delta / $this->_damp) : ($delta / 2)); $delta += intval($delta / $npoints); for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) { $delta = intval($delta / ($this->_base - $this->_tmin)); } return intval($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew)); } /** * Encoding a certain digit * @access private */ function _encode_digit($d) { return chr($d + 22 + 75 * ($d < 26)); } /** * Decode a certain digit * @access private */ function _decode_digit($cp) { $cp = ord($cp); return ($cp - 48 < 10) ? $cp - 22 : (($cp - 65 < 26) ? $cp - 65 : (($cp - 97 < 26) ? $cp - 97 : $this->_base)); } /** * Internal error handling method * @access private */ function _error($error = '') { $this->_error = $error; } /** * Do Nameprep according to RFC3491 and RFC3454 * @param array Unicode Characters * @return string Unicode Characters, Nameprep'd * @access private */ function _nameprep($input) { $output = array(); $error = false; // // Mapping // Walking through the input array, performing the required steps on each of // the input chars and putting the result into the output array // While mapping required chars we apply the cannonical ordering foreach ($input as $v) { // Map to nothing == skip that code point if (in_array($v, $this->NP['map_nothing'])) continue; // Try to find prohibited input if (in_array($v, $this->NP['prohibit']) || in_array($v, $this->NP['general_prohibited'])) { $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v)); return false; } foreach ($this->NP['prohibit_ranges'] as $range) { if ($range[0] <= $v && $v <= $range[1]) { $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v)); return false; } } // // Hangul syllable decomposition if (0xAC00 <= $v && $v <= 0xD7AF) { foreach ($this->_hangul_decompose($v) as $out) { $output[] = (int) $out; } // There's a decomposition mapping for that code point } elseif (isset($this->NP['replacemaps'][$v])) { foreach ($this->_apply_cannonical_ordering($this->NP['replacemaps'][$v]) as $out) { $output[] = (int) $out; } } else { $output[] = (int) $v; } } // Before applying any Combining, try to rearrange any Hangul syllables $output = $this->_hangul_compose($output); // // Combine code points // $last_class = 0; $last_starter = 0; $out_len = count($output); for ($i = 0; $i < $out_len; ++$i) { $class = $this->_get_combining_class($output[$i]); if ((!$last_class || $last_class > $class) && $class) { // Try to match $seq_len = $i - $last_starter; $out = $this->_combine(array_slice($output, $last_starter, $seq_len)); // On match: Replace the last starter with the composed character and remove // the now redundant non-starter(s) if ($out) { $output[$last_starter] = $out; if (count($out) != $seq_len) { for ($j = $i+1; $j < $out_len; ++$j) { $output[$j-1] = $output[$j]; } unset($output[$out_len]); } // Rewind the for loop by one, since there can be more possible compositions $i--; $out_len--; $last_class = ($i == $last_starter) ? 0 : $this->_get_combining_class($output[$i-1]); continue; } } // The current class is 0 if (!$class) $last_starter = $i; $last_class = $class; } return $output; } /** * Decomposes a Hangul syllable * (see http://www.unicode.org/unicode/reports/tr15/#Hangul * @param integer 32bit UCS4 code point * @return array Either Hangul Syllable decomposed or original 32bit value as one value array * @access private */ function _hangul_decompose($char) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -