📄 idna_convert.class.php
字号:
$sindex = (int) $char - $this->_sbase; if ($sindex < 0 || $sindex >= $this->_scount) { return array($char); } $result = array(); $result[] = (int) $this->_lbase + $sindex / $this->_ncount; $result[] = (int) $this->_vbase + ($sindex % $this->_ncount) / $this->_tcount; $T = intval($this->_tbase + $sindex % $this->_tcount); if ($T != $this->_tbase) $result[] = $T; return $result; } /** * Ccomposes a Hangul syllable * (see http://www.unicode.org/unicode/reports/tr15/#Hangul * @param array Decomposed UCS4 sequence * @return array UCS4 sequence with syllables composed * @access private */ function _hangul_compose($input) { $inp_len = count($input); if (!$inp_len) return array(); $result = array(); $last = (int) $input[0]; $result[] = $last; // copy first char from input to output for ($i = 1; $i < $inp_len; ++$i) { $char = (int) $input[$i]; $sindex = $last - $this->_sbase; $lindex = $last - $this->_lbase; $vindex = $char - $this->_vbase; $tindex = $char - $this->_tbase; // Find out, whether two current characters are LV and T if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount == 0) && 0 <= $tindex && $tindex <= $this->_tcount) { // create syllable of form LVT $last += $tindex; $result[(count($result) - 1)] = $last; // reset last continue; // discard char } // Find out, whether two current characters form L and V if (0 <= $lindex && $lindex < $this->_lcount && 0 <= $vindex && $vindex < $this->_vcount) { // create syllable of form LV $last = (int) $this->_sbase + ($lindex * $this->_vcount + $vindex) * $this->_tcount; $result[(count($result) - 1)] = $last; // reset last continue; // discard char } // if neither case was true, just add the character $last = $char; $result[] = $char; } return $result; } /** * Returns the combining class of a certain wide char * @param integer Wide char to check (32bit integer) * @return integer Combining class if found, else 0 * @access private */ function _get_combining_class($char) { return isset($this->NP['norm_combcls'][$char]) ? $this->NP['norm_combcls'][$char] : 0; } /** * Apllies the cannonical ordering of a decomposed UCS4 sequence * @param array Decomposed UCS4 sequence * @return array Ordered USC4 sequence * @access private */ function _apply_cannonical_ordering($input) { $swap = true; $size = count($input); while ($swap) { $swap = false; $last = $this->_get_combining_class(intval($input[0])); for ($i = 0; $i < $size-1; ++$i) { $next = $this->_get_combining_class(intval($input[$i+1])); if ($next != 0 && $last > $next) { // Move item leftward until it fits for ($j = $i + 1; $j > 0; --$j) { if ($this->_get_combining_class(intval($input[$j-1])) <= $next) break; $t = intval($input[$j]); $input[$j] = intval($input[$j-1]); $input[$j-1] = $t; $swap = true; } // Reentering the loop looking at the old character again $next = $last; } $last = $next; } } return $input; } /** * Do composition of a sequence of starter and non-starter * @param array UCS4 Decomposed sequence * @return array Ordered USC4 sequence * @access private */ function _combine($input) { $inp_len = count($input); foreach ($this->NP['replacemaps'] as $np_src => $np_target) { if ($np_target[0] != $input[0]) continue; if (count($np_target) != $inp_len) continue; $hit = false; foreach ($input as $k2 => $v2) { if ($v2 == $np_target[$k2]) { $hit = true; } else { $hit = false; break; } } if ($hit) return $np_src; } return false; } /** * This converts an UTF-8 encoded string to its UCS-4 representation * By talking about UCS-4 "strings" we mean arrays of 32bit integers representing * each of the "chars". This is due to PHP not being able to handle strings with * bit depth different from 8. This apllies to the reverse method _ucs4_to_utf8(), too. * The following UTF-8 encodings are supported: * bytes bits representation * 1 7 0xxxxxxx * 2 11 110xxxxx 10xxxxxx * 3 16 1110xxxx 10xxxxxx 10xxxxxx * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx * Each x represents a bit that can be used to store character data. * The five and six byte sequences are part of Annex D of ISO/IEC 10646-1:2000 * @access private */ function _utf8_to_ucs4($input) { $output = array(); $out_len = 0; $inp_len = strlen($input); $mode = 'next'; $test = 'none'; for ($k = 0; $k < $inp_len; ++$k) { $v = ord($input{$k}); // Extract byte from input string if ($v < 128) { // We found an ASCII char - put into stirng as is $output[$out_len] = $v; ++$out_len; if ('add' == $mode) { $this->_error('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k); return false; } continue; } if ('next' == $mode) { // Try to find the next start byte; determine the width of the Unicode char $start_byte = $v; $mode = 'add'; $test = 'range'; if ($v >> 5 == 6) { // &110xxxxx 10xxxxx $next_byte = 0; // Tells, how many times subsequent bitmasks must rotate 6bits to the left $v = ($v - 192) << 6; } elseif ($v >> 4 == 14) { // &1110xxxx 10xxxxxx 10xxxxxx $next_byte = 1; $v = ($v - 224) << 12; } elseif ($v >> 3 == 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx $next_byte = 2; $v = ($v - 240) << 18; } elseif ($v >> 2 == 62) { // &111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx $next_byte = 3; $v = ($v - 248) << 24; } elseif ($v >> 1 == 126) { // &1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx $next_byte = 4; $v = ($v - 252) << 30; } else { $this->_error('This might be UTF-8, but I don\'t understand it at byte '.$k); return false; } if ('add' == $mode) { $output[$out_len] = (int) $v; ++$out_len; continue; } } if ('add' == $mode) { if (!$this->_allow_overlong && $test == 'range') { $test = 'none'; if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) { $this->_error('Bogus UTF-8 character detected (out of legal range) at byte '.$k); return false; } } if ($v >> 6 == 2) { // Bit mask must be 10xxxxxx $v = ($v - 128) << ($next_byte * 6); $output[($out_len - 1)] += $v; --$next_byte; } else { $this->_error('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k); return false; } if ($next_byte < 0) { $mode = 'next'; } } } // for return $output; } /** * Convert UCS-4 string into UTF-8 string * See _utf8_to_ucs4() for details * @access private */ function _ucs4_to_utf8($input) { $output = ''; $k = 0; foreach ($input as $v) { ++$k; // $v = ord($v); if ($v < 128) { // 7bit are transferred literally $output .= chr($v); } elseif ($v < (1 << 11)) { // 2 bytes $output .= chr(192 + ($v >> 6)) . chr(128 + ($v & 63)); } elseif ($v < (1 << 16)) { // 3 bytes $output .= chr(224 + ($v >> 12)) . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63)); } elseif ($v < (1 << 21)) { // 4 bytes $output .= chr(240 + ($v >> 18)) . chr(128 + (($v >> 12) & 63)) . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63)); } elseif ($v < (1 << 26)) { // 5 bytes $output .= chr(248 + ($v >> 24)) . chr(128 + (($v >> 18) & 63)) . chr(128 + (($v >> 12) & 63)) . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63)); } elseif ($v < (1 << 31)) { // 6 bytes $output .= chr(252 + ($v >> 30)) . chr(128 + (($v >> 24) & 63)) . chr(128 + (($v >> 18) & 63)) . chr(128 + (($v >> 12) & 63)) . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63)); } else { $this->_error('Conversion from UCS-4 to UTF-8 failed: malformed input at byte '.$k); return false; } } return $output; } /** * Convert UCS-4 array into UCS-4 string * * @access private */ function _ucs4_to_ucs4_string($input) { $output = ''; // Take array values and split output to 4 bytes per value // The bit mask is 255, which reads &11111111 foreach ($input as $v) { $output .= chr(($v >> 24) & 255).chr(($v >> 16) & 255).chr(($v >> 8) & 255).chr($v & 255); } return $output; } /** * Convert UCS-4 strin into UCS-4 garray * * @access private */ function _ucs4_string_to_ucs4($input) { $output = array(); $inp_len = strlen($input); // Input length must be dividable by 4 if ($inp_len % 4) { $this->_error('Input UCS4 string is broken'); return false; } // Empty input - return empty output if (!$inp_len) return $output; for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) { // Increment output position every 4 input bytes if (!($i % 4)) { $out_len++; $output[$out_len] = 0; } $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) ); } return $output; }}/*** Adapter class for aligning the API of idna_convert with that of Net_IDNA* @author Matthias Sommerfeld <mso@phlylabs.de>*/class Net_IDNA_php4 extends idna_convert{ /** * Sets a new option value. Available options and values: * [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8, * 'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8] * [overlong - Unicode does not allow unnecessarily long encodings of chars, * to allow this, set this parameter to true, else to false; * default is false.] * [strict - true: strict mode, good for registration purposes - Causes errors * on failures; false: loose mode, ideal for "wildlife" applications * by silently ignoring errors and returning the original input instead * * @param mixed Parameter to set (string: single parameter; array of Parameter => Value pairs) * @param string Value to use (if parameter 1 is a string) * @return boolean true on success, false otherwise * @access public */ function setParams($option, $param = false) { return $this->IC->set_parameters($option, $param); }}?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -