📄 utf_tools.php
字号:
"\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD", "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A", "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A", "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94", "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98", "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C", "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0", "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5", "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9", "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E", "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92", "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96", "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A", "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E", "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2", "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6", "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA", "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE", "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83", "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87", "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B", "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90", "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80", "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80", "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2" ); return strtr(strtoupper($string), $UTF8_LOWER_TO_UPPER); } /** * UTF-8 aware alternative to substr * Return part of a string given character offset (and optionally length) * * Note arguments: comparied to substr - if offset or length are * not integers, this version will not complain but rather massages them * into an integer. * * Note on returned values: substr documentation states false can be * returned in some cases (e.g. offset > string length) * mb_substr never returns false, it will return an empty string instead. * This adopts the mb_substr approach * * Note on implementation: PCRE only supports repetitions of less than * 65536, in order to accept up to MAXINT values for offset and length, * we'll repeat a group of 65535 characters when needed. * * Note on implementation: calculating the number of characters in the * string is a relatively expensive operation, so we only carry it out when * necessary. It isn't necessary for +ve offsets and no specified length * * @author Chris Smith<chris@jalakai.co.uk> * @param string $str * @param integer $offset number of UTF-8 characters offset (from left) * @param integer $length (optional) length in UTF-8 characters from offset * @return mixed string or FALSE if failure */ function utf8_substr($str, $offset, $length = NULL) { // generates E_NOTICE // for PHP4 objects, but not PHP5 objects $str = (string) $str; $offset = (int) $offset; if (!is_null($length)) { $length = (int) $length; } // handle trivial cases if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset)) { return ''; } // normalise negative offsets (we could use a tail // anchored pattern, but they are horribly slow!) if ($offset < 0) { // see notes $strlen = utf8_strlen($str); $offset = $strlen + $offset; if ($offset < 0) { $offset = 0; } } $op = ''; $lp = ''; // establish a pattern for offset, a // non-captured group equal in length to offset if ($offset > 0) { $ox = (int) ($offset / 65535); $oy = $offset % 65535; if ($ox) { $op = '(?:.{65535}){' . $ox . '}'; } $op = '^(?:' . $op . '.{' . $oy . '})'; } else { // offset == 0; just anchor the pattern $op = '^'; } // establish a pattern for length if (is_null($length)) { // the rest of the string $lp = '(.*)$'; } else { if (!isset($strlen)) { // see notes $strlen = utf8_strlen($str); } // another trivial case if ($offset > $strlen) { return ''; } if ($length > 0) { // reduce any length that would // go passed the end of the string $length = min($strlen - $offset, $length); $lx = (int) ($length / 65535); $ly = $length % 65535; // negative length requires a captured group // of length characters if ($lx) { $lp = '(?:.{65535}){' . $lx . '}'; } $lp = '(' . $lp . '.{'. $ly . '})'; } else if ($length < 0) { if ($length < ($offset - $strlen)) { return ''; } $lx = (int)((-$length) / 65535); $ly = (-$length) % 65535; // negative length requires ... capture everything // except a group of -length characters // anchored at the tail-end of the string if ($lx) { $lp = '(?:.{65535}){' . $lx . '}'; } $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$'; } } if (!preg_match('#' . $op . $lp . '#us', $str, $match)) { return ''; } return $match[1]; } /** * Return the length (in characters) of a UTF-8 string * * @param string $text UTF-8 string * @return integer Length (in chars) of given string */ function utf8_strlen($text) { // Since utf8_decode is replacing multibyte characters to ? strlen works fine return strlen(utf8_decode($text)); }}/*** UTF-8 aware alternative to str_split* Convert a string to an array* * @author Harry Fuecks* @param string $str UTF-8 encoded* @param int $split_len number to characters to split string by* @return string characters in string reverses*/function utf8_str_split($str, $split_len = 1){ if (!is_int($split_len) || $split_len < 1) { return false; } $len = utf8_strlen($str); if ($len <= $split_len) { return array($str); } preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar); return $ar[0];}/*** UTF-8 aware alternative to strcspn* Find length of initial segment not matching mask* * @author Harry Fuecks*/function utf8_strspn($str, $mask, $start = null, $length = null){ if ($start !== null || $length !== null) { $str = utf8_substr($str, $start, $length); } preg_match('/^[' . $mask . ']+/u', $str, $matches); if (isset($matches[0])) { return utf8_strlen($matches[0]); } return 0;}/*** UTF-8 aware alternative to ucfirst* Make a string's first character uppercase* * @author Harry Fuecks* @param string* @return string with first character as upper case (if applicable)*/function utf8_ucfirst($str){ switch (utf8_strlen($str)) { case 0: return ''; break; case 1: return utf8_strtoupper($str); break; default: preg_match('/^(.{1})(.*)$/us', $str, $matches); return utf8_strtoupper($matches[1]) . $matches[2]; break; }}/*** Recode a string to UTF-8** If the encoding is not supported, the string is returned as-is** @param string $string Original string* @param string $encoding Original encoding (lowered)* @return string The string, encoded in UTF-8*/function utf8_recode($string, $encoding){ $encoding = strtolower($encoding); if ($encoding == 'utf-8' || !is_string($string) || !isset($string[0])) { return $string; } // start with something simple if ($encoding == 'iso-8859-1') { return utf8_encode($string); } // First, try iconv() if (function_exists('iconv')) { $ret = @iconv($encoding, 'utf-8', $string); if (isset($ret[0])) { return $ret; } } // Try the mb_string extension if (function_exists('mb_convert_encoding')) { $ret = @mb_convert_encoding($string, 'utf-8', $encoding); if (isset($ret[0])) { return $ret; } } // Try the recode extension if (function_exists('recode_string')) { $ret = @recode_string($encoding . '..utf-8', $string); if (isset($ret[0])) { return $ret; } } // If nothing works, check if we have a custom transcoder available if (!preg_match('#^[a-z0-9\\-]+$#', $encoding)) { // Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); } global $phpbb_root_path, $phpEx; // iso-8859-* character encoding if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array)) { switch ($array[1]) { case '1': case '2': case '4': case '7': case '9': case '15': if (!function_exists('iso_8859_' . $array[1])) { if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) { trigger_error('Basic reencoder file is missing', E_USER_ERROR); } include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); } return call_user_func('iso_8859_' . $array[1], $string); break; default: trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); break; } } // CP/WIN character encoding if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array)) { switch ($array[1]) { case '932': break; case '1250': case '1251': case '1254': case '1255': case '1256': case '1257': case '874': if (!function_exists('cp' . $array[1])) { if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) { trigger_error('Basic reencoder file is missing', E_USER_ERROR); } include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); } return call_user_func('cp' . $array[1], $string); break; default: trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR); break; } } // TIS-620 if (preg_match('/tis[_ -]?620/', $encoding)) { if (!function_exists('tis_620')) { if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx)) { trigger_error('Basic reencoder file is missing', E_USER_ERROR); } include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx); } return tis_620($string); } // SJIS if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding)) { if (!function_exists('sjis')) { if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx)) { trigger_error('CJK reencoder file is missing', E_USER_ERROR); } include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx); } return sjis($string); } // EUC_KR if (preg_match('/euc[_ -]?kr/', $encoding)) { if (!function_exists('euc_kr')) { if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -