idna_convert.class.php

来自「Joomla15 - 最新开源CMS」· PHP 代码 · 共 991 行 · 第 1/3 页
PHP
991 行
                $output .= $encoded;
            } else {
                $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
            }
            return $output;
        } else {
            if ($output = $this->_encode($decoded)) {
                return $output;
            } else {
                return $this->_ucs4_to_utf8($decoded);
            }
        }
    }

    /**
    * Use this method to get the last error ocurred
    * @param    void
    * @return   string   The last error, that occured
    * @access   public
    */
    function get_last_error()
    {
        return $this->_error;
    }

    /**
    * The actual decoding algorithm
    * @access   private
    */
    function _decode($encoded)
    {
        // We do need to find the Punycode prefix
        if (!preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $encoded)) {
            $this->_error('This is not a punycode string');
            return false;
        }
        $encode_test = preg_replace('!^'.preg_quote($this->_punycode_prefix, '!').'!', '', $encoded);
        // If nothing left after removing the prefix, it is hopeless
        if (!$encode_test) {
            $this->_error('The given encoded string was empty');
            return false;
        }
        // Find last occurence of the delimiter
        $delim_pos = strrpos($encoded, '-');
        if ($delim_pos > strlen($this->_punycode_prefix)) {
            for ($k = strlen($this->_punycode_prefix); $k < $delim_pos; ++$k) {
                $decoded[] = ord($encoded{$k});
            }
        } else {
            $decoded = array();
        }
        $deco_len = count($decoded);
        $enco_len = strlen($encoded);

        // Wandering through the strings; init
        $is_first = true;
        $bias     = $this->_initial_bias;
        $idx      = 0;
        $char     = $this->_initial_n;

        for ($enco_idx = ($delim_pos) ? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
            for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {
                $digit = $this->_decode_digit($encoded{$enco_idx++});
                $idx += $digit * $w;
                $t = ($k <= $bias) ? $this->_tmin :
                        (($k >= $bias + $this->_tmax) ? $this->_tmax : ($k - $bias));
                if ($digit < $t) break;
                $w = (int) ($w * ($this->_base - $t));
            }
            $bias = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);
            $is_first = false;
            $char += (int) ($idx / ($deco_len + 1));
            $idx %= ($deco_len + 1);
            if ($deco_len > 0) {
                // Make room for the decoded char
                for ($i = $deco_len; $i > $idx; $i--) {
                    $decoded[$i] = $decoded[($i - 1)];
                }
            }
            $decoded[$idx++] = $char;
        }
        return $this->_ucs4_to_utf8($decoded);
    }

    /**
    * The actual encoding algorithm
    * @access   private
    */
    function _encode($decoded)
    {
        // We cannot encode a domain name containing the Punycode prefix
        $extract = strlen($this->_punycode_prefix);
        $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
        $check_deco = array_slice($decoded, 0, $extract);

        if ($check_pref == $check_deco) {
            $this->_error('This is already a punycode string');
            return false;
        }
        // We will not try to encode strings consisting of basic code points only
        $encodable = false;
        foreach ($decoded as $k => $v) {
            if ($v > 0x7a) {
                $encodable = true;
                break;
            }
        }
        if (!$encodable) {
            $this->_error('The given string does not contain encodable chars');
            return false;
        }

        // Do NAMEPREP
        $decoded = $this->_nameprep($decoded);
        if (!$decoded || !is_array($decoded)) return false; // NAMEPREP failed

        $deco_len  = count($decoded);
        if (!$deco_len) return false; // Empty array

        $codecount = 0; // How many chars have been consumed

        $encoded = '';
        // Copy all basic code points to output
        for ($i = 0; $i < $deco_len; ++$i) {
            $test = $decoded[$i];
            // Will match [0-9a-zA-Z-]
            if ((0x2F < $test && $test < 0x40)
                    || (0x40 < $test && $test < 0x5B)
                    || (0x60 < $test && $test <= 0x7B)
                    || (0x2D == $test)) {
                $encoded .= chr($decoded[$i]);
                $codecount++;
            }
        }
        if ($codecount == $deco_len) return $encoded; // All codepoints were basic ones

        // Start with the prefix; copy it to output
        $encoded = $this->_punycode_prefix.$encoded;

        // If we have basic code points in output, add an hyphen to the end
        if ($codecount) $encoded .= '-';

        // Now find and encode all non-basic code points
        $is_first  = true;
        $cur_code  = $this->_initial_n;
        $bias      = $this->_initial_bias;
        $delta     = 0;
        while ($codecount < $deco_len) {
            // Find the smallest code point >= the current code point and
            // remember the last ouccrence of it in the input
            for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
                if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
                    $next_code = $decoded[$i];
                }
            }

            $delta += ($next_code - $cur_code) * ($codecount + 1);
            $cur_code = $next_code;

            // Scan input again and encode all characters whose code point is $cur_code
            for ($i = 0; $i < $deco_len; $i++) {
                if ($decoded[$i] < $cur_code) {
                    $delta++;
                } elseif ($decoded[$i] == $cur_code) {
                    for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {
                        $t = ($k <= $bias) ? $this->_tmin :
                                (($k >= $bias + $this->_tmax) ? $this->_tmax : $k - $bias);
                        if ($q < $t) break;
                        $encoded .= $this->_encode_digit(ceil($t + (($q - $t) % ($this->_base - $t))));
                        $q = ($q - $t) / ($this->_base - $t);
                    }
                    $encoded .= $this->_encode_digit($q);
                    $bias = $this->_adapt($delta, $codecount+1, $is_first);
                    $codecount++;
                    $delta = 0;
                    $is_first = false;
                }
            }
            $delta++;
            $cur_code++;
        }
        return $encoded;
    }

    /**
    * Adapt the bias according to the current code point and position
    * @access   private
    */
    function _adapt($delta, $npoints, $is_first)
    {
        $delta = (int) ($is_first ? ($delta / $this->_damp) : ($delta / 2));
        $delta += (int) ($delta / $npoints);
        for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {
            $delta = (int) ($delta / ($this->_base - $this->_tmin));
        }
        return (int) ($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
    }

    /**
    * Encoding a certain digit
    * @access   private
    */
    function _encode_digit($d)
    {
        return chr($d + 22 + 75 * ($d < 26));
    }

    /**
    * Decode a certain digit
    * @access   private
    */
    function _decode_digit($cp)
    {
        $cp = ord($cp);
        return ($cp - 48 < 10) ? $cp - 22 : (($cp - 65 < 26) ? $cp - 65 : (($cp - 97 < 26) ? $cp - 97 : $this->_base));
    }

    /**
    * Internal error handling method
    * @access   private
    */
    function _error($error = '')
    {
        $this->_error = $error;
    }

    /**
    * Do Nameprep according to RFC3491 and RFC3454
    * @param    array    Unicode Characters
    * @return   string   Unicode Characters, Nameprep'd
    * @access   private
    */
    function _nameprep($input)
    {
        $output = array();
        $error = false;
        //
        // Mapping
        // Walking through the input array, performing the required steps on each of
        // the input chars and putting the result into the output array
        // While mapping required chars we apply the cannonical ordering

        // $this->_show_hex($input);
        foreach ($input as $v) {
            // Map to nothing == skip that code point
            if (in_array($v, $this->_np_['map_nothing'])) continue;

            // Try to find prohibited input
            if (in_array($v, $this->_np_['prohibit']) || in_array($v, $this->_np_['general_prohibited'])) {
                $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
                return false;
            }
            foreach ($this->_np_['prohibit_ranges'] as $range) {
                if ($range[0] <= $v && $v <= $range[1]) {
                    $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
                    return false;
                }
            }
            //
            // Hangul syllable decomposition
            if (0xAC00 <= $v && $v <= 0xD7AF) {
                foreach ($this->_hangul_decompose($v) as $out) {
                    $output[] = $out;
                }
            // There's a decomposition mapping for that code point
            } elseif (isset($this->_np_['replacemaps'][$v])) {
                foreach ($this->_apply_cannonical_ordering($this->_np_['replacemaps'][$v]) as $out) {
                    $output[] = $out;
                }
            } else {
                $output[] = $v;
            }
        }
        //
        // Combine code points
        //
        $last_class   = 0;
        $last_starter = 0;
        $out_len      = count($output);
        for ($i = 0; $i < $out_len; ++$i) {
            $class = $this->_get_combining_class($output[$i]);
            if ((!$last_class || $last_class != $class) && $class) {
                // Try to match
                $seq_len = $i - $last_starter;
                $out = $this->_combine(array_slice($output, $last_starter, $seq_len));
                // On match: Replace the last starter with the composed character and remove
                // the now redundant non-starter(s)
                if ($out) {
                    $output[$last_starter] = $out;
                    if (count($out) != $seq_len) {
                        for ($j = $i+1; $j < $out_len; ++$j) {
                            $output[$j-1] = $output[$j];
                        }
                        unset($output[$out_len]);
                    }
                    // Rewind the for loop by one, since there can be more possible compositions
                    $i--;
                    $out_len--;
                    $last_class = ($i == $last_starter) ? 0 : $this->_get_combining_class($output[$i-1]);
                    continue;
                }
            }
            if (!$class) { // The current class is 0
                $last_starter = $i;
            }
            $last_class = $class;
        }
        return $output;
    }

    /**
    * Decomposes a Hangul syllable
    * (see http://www.unicode.org/unicode/reports/tr15/#Hangul
    * @param    integer  32bit UCS4 code point
    * @return   array    Either Hangul Syllable decomposed or original 32bit value as one value array
    * @access   private
    */
    function _hangul_decompose($char)
    {
        $sindex = $char - $this->_sbase;
        if ($sindex < 0 || $sindex >= $this->_scount) {
            return array($char);
        }
        $result = array();
        $T = $this->_tbase + $sindex % $this->_tcount;
        $result[] = (int) ($this->_lbase + $sindex / $this->_ncount);
        $result[] = (int) ($this->_vbase + ($sindex % $this->_ncount) / $this->_tcount);
        if ($T != $this->_tbase) $result[] = $T;
        return $result;
    }
idna_convert.class.php - 源码说明

本页面展示了「Joomla15 - 最新开源CMS」中的 idna_convert.class.php 源码文件，采用 PHP 编程语言编写，共 991 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Joomla相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?