idna_convert.class.php

来自「Joomla!是一套获得过多个奖项的内容管理系统(Content Managem」· PHP 代码 · 共 969 行 · 第 1/3 页
PHP
969 行
        $sindex = (int) $char - $this->_sbase;        if ($sindex < 0 || $sindex >= $this->_scount) {            return array($char);        }        $result = array();        $result[] = (int) $this->_lbase + $sindex / $this->_ncount;        $result[] = (int) $this->_vbase + ($sindex % $this->_ncount) / $this->_tcount;        $T = intval($this->_tbase + $sindex % $this->_tcount);        if ($T != $this->_tbase) $result[] = $T;        return $result;    }    /**     * Ccomposes a Hangul syllable     * (see http://www.unicode.org/unicode/reports/tr15/#Hangul     * @param    array    Decomposed UCS4 sequence     * @return   array    UCS4 sequence with syllables composed     * @access   private     */    function _hangul_compose($input)    {        $inp_len = count($input);        if (!$inp_len) return array();        $result = array();        $last = (int) $input[0];        $result[] = $last; // copy first char from input to output        for ($i = 1; $i < $inp_len; ++$i) {            $char = (int) $input[$i];            $sindex = $last - $this->_sbase;            $lindex = $last - $this->_lbase;            $vindex = $char - $this->_vbase;            $tindex = $char - $this->_tbase;            // Find out, whether two current characters are LV and T            if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount == 0)                    && 0 <= $tindex && $tindex <= $this->_tcount) {                // create syllable of form LVT                $last += $tindex;                $result[(count($result) - 1)] = $last; // reset last                continue; // discard char            }            // Find out, whether two current characters form L and V            if (0 <= $lindex && $lindex < $this->_lcount && 0 <= $vindex && $vindex < $this->_vcount) {                // create syllable of form LV                $last = (int) $this->_sbase + ($lindex * $this->_vcount + $vindex) * $this->_tcount;                $result[(count($result) - 1)] = $last; // reset last                continue; // discard char            }            // if neither case was true, just add the character            $last = $char;            $result[] = $char;        }        return $result;    }    /**     * Returns the combining class of a certain wide char     * @param    integer    Wide char to check (32bit integer)     * @return   integer    Combining class if found, else 0     * @access   private     */    function _get_combining_class($char)    {        return isset($this->NP['norm_combcls'][$char]) ? $this->NP['norm_combcls'][$char] : 0;    }    /**     * Apllies the cannonical ordering of a decomposed UCS4 sequence     * @param    array      Decomposed UCS4 sequence     * @return   array      Ordered USC4 sequence     * @access   private     */    function _apply_cannonical_ordering($input)    {        $swap = true;        $size = count($input);        while ($swap) {            $swap = false;            $last = $this->_get_combining_class(intval($input[0]));            for ($i = 0; $i < $size-1; ++$i) {                $next = $this->_get_combining_class(intval($input[$i+1]));                if ($next != 0 && $last > $next) {                    // Move item leftward until it fits                    for ($j = $i + 1; $j > 0; --$j) {                        if ($this->_get_combining_class(intval($input[$j-1])) <= $next) break;                        $t = intval($input[$j]);                        $input[$j] = intval($input[$j-1]);                        $input[$j-1] = $t;                        $swap = true;                    }                    // Reentering the loop looking at the old character again                    $next = $last;                }                $last = $next;            }        }        return $input;    }    /**     * Do composition of a sequence of starter and non-starter     * @param    array      UCS4 Decomposed sequence     * @return   array      Ordered USC4 sequence     * @access   private     */    function _combine($input)    {        $inp_len = count($input);        foreach ($this->NP['replacemaps'] as $np_src => $np_target) {            if ($np_target[0] != $input[0]) continue;            if (count($np_target) != $inp_len) continue;            $hit = false;            foreach ($input as $k2 => $v2) {                if ($v2 == $np_target[$k2]) {                    $hit = true;                } else {                    $hit = false;                    break;                }            }            if ($hit) return $np_src;        }        return false;    }    /**     * This converts an UTF-8 encoded string to its UCS-4 representation     * By talking about UCS-4 "strings" we mean arrays of 32bit integers representing     * each of the "chars". This is due to PHP not being able to handle strings with     * bit depth different from 8. This apllies to the reverse method _ucs4_to_utf8(), too.     * The following UTF-8 encodings are supported:     * bytes bits  representation     * 1        7  0xxxxxxx     * 2       11  110xxxxx 10xxxxxx     * 3       16  1110xxxx 10xxxxxx 10xxxxxx     * 4       21  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx     * 5       26  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx     * 6       31  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx     * Each x represents a bit that can be used to store character data.     * The five and six byte sequences are part of Annex D of ISO/IEC 10646-1:2000     * @access   private     */    function _utf8_to_ucs4($input)    {        $output = array();        $out_len = 0;        $inp_len = strlen($input);        $mode = 'next';        $test = 'none';        for ($k = 0; $k < $inp_len; ++$k) {            $v = ord($input{$k}); // Extract byte from input string            if ($v < 128) { // We found an ASCII char - put into stirng as is                $output[$out_len] = $v;                ++$out_len;                if ('add' == $mode) {                    $this->_error('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);                    return false;                }                continue;            }            if ('next' == $mode) { // Try to find the next start byte; determine the width of the Unicode char                $start_byte = $v;                $mode = 'add';                $test = 'range';                if ($v >> 5 == 6) { // &110xxxxx 10xxxxx                    $next_byte = 0; // Tells, how many times subsequent bitmasks must rotate 6bits to the left                    $v = ($v - 192) << 6;                } elseif ($v >> 4 == 14) { // &1110xxxx 10xxxxxx 10xxxxxx                    $next_byte = 1;                    $v = ($v - 224) << 12;                } elseif ($v >> 3 == 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx                    $next_byte = 2;                    $v = ($v - 240) << 18;                } elseif ($v >> 2 == 62) { // &111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx                    $next_byte = 3;                    $v = ($v - 248) << 24;                } elseif ($v >> 1 == 126) { // &1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx                    $next_byte = 4;                    $v = ($v - 252) << 30;                } else {                    $this->_error('This might be UTF-8, but I don\'t understand it at byte '.$k);                    return false;                }                if ('add' == $mode) {                    $output[$out_len] = (int) $v;                    ++$out_len;                    continue;                }            }            if ('add' == $mode) {                if (!$this->_allow_overlong && $test == 'range') {                    $test = 'none';                    if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {                        $this->_error('Bogus UTF-8 character detected (out of legal range) at byte '.$k);                        return false;                    }                }                if ($v >> 6 == 2) { // Bit mask must be 10xxxxxx                    $v = ($v - 128) << ($next_byte * 6);                    $output[($out_len - 1)] += $v;                    --$next_byte;                } else {                    $this->_error('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);                    return false;                }                if ($next_byte < 0) {                    $mode = 'next';                }            }        } // for        return $output;    }    /**     * Convert UCS-4 string into UTF-8 string     * See _utf8_to_ucs4() for details     * @access   private     */    function _ucs4_to_utf8($input)    {        $output = '';        $k = 0;        foreach ($input as $v) {            ++$k;            // $v = ord($v);            if ($v < 128) { // 7bit are transferred literally                $output .= chr($v);            } elseif ($v < (1 << 11)) { // 2 bytes                $output .= chr(192 + ($v >> 6)) . chr(128 + ($v & 63));            } elseif ($v < (1 << 16)) { // 3 bytes                $output .= chr(224 + ($v >> 12)) . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));            } elseif ($v < (1 << 21)) { // 4 bytes                $output .= chr(240 + ($v >> 18)) . chr(128 + (($v >> 12) & 63))                         . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));            } elseif ($v < (1 << 26)) { // 5 bytes                $output .= chr(248 + ($v >> 24)) . chr(128 + (($v >> 18) & 63))                         . chr(128 + (($v >> 12) & 63)) . chr(128 + (($v >> 6) & 63))                         . chr(128 + ($v & 63));            } elseif ($v < (1 << 31)) { // 6 bytes                $output .= chr(252 + ($v >> 30)) . chr(128 + (($v >> 24) & 63))                         . chr(128 + (($v >> 18) & 63)) . chr(128 + (($v >> 12) & 63))                         . chr(128 + (($v >> 6) & 63)) . chr(128 + ($v & 63));            } else {                $this->_error('Conversion from UCS-4 to UTF-8 failed: malformed input at byte '.$k);                return false;            }        }        return $output;    }    /**      * Convert UCS-4 array into UCS-4 string      *      * @access   private      */    function _ucs4_to_ucs4_string($input)    {        $output = '';        // Take array values and split output to 4 bytes per value        // The bit mask is 255, which reads &11111111        foreach ($input as $v) {            $output .= chr(($v >> 24) & 255).chr(($v >> 16) & 255).chr(($v >> 8) & 255).chr($v & 255);        }        return $output;    }    /**      * Convert UCS-4 strin into UCS-4 garray      *      * @access   private      */    function _ucs4_string_to_ucs4($input)    {        $output = array();        $inp_len = strlen($input);        // Input length must be dividable by 4        if ($inp_len % 4) {            $this->_error('Input UCS4 string is broken');            return false;        }        // Empty input - return empty output        if (!$inp_len) return $output;        for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {            // Increment output position every 4 input bytes            if (!($i % 4)) {                $out_len++;                $output[$out_len] = 0;            }            $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );        }        return $output;    }}/*** Adapter class for aligning the API of idna_convert with that of Net_IDNA* @author  Matthias Sommerfeld <mso@phlylabs.de>*/class Net_IDNA_php4 extends idna_convert{    /**     * Sets a new option value. Available options and values:     * [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,     *         'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]     * [overlong - Unicode does not allow unnecessarily long encodings of chars,     *             to allow this, set this parameter to true, else to false;     *             default is false.]     * [strict - true: strict mode, good for registration purposes - Causes errors     *           on failures; false: loose mode, ideal for "wildlife" applications     *           by silently ignoring errors and returning the original input instead     *     * @param    mixed     Parameter to set (string: single parameter; array of Parameter => Value pairs)     * @param    string    Value to use (if parameter 1 is a string)     * @return   boolean   true on success, false otherwise     * @access   public     */    function setParams($option, $param = false)    {        return $this->IC->set_parameters($option, $param);    }}?>
idna_convert.class.php - 源码说明

本页面展示了「Joomla!是一套获得过多个奖项的内容管理系统(Content Management System, CMS)。Joomla!采用PHP+MySQL数据库开发」中的 idna_convert.class.php 源码文件，采用 PHP 编程语言编写，共 969 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Joomla相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?