📄 utf_tools.php

📁 通过基于Windows的图形化界面
💻 PHP
📖 第 1 页 / 共 3 页
字号:
			"\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",			"\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",			"\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",			"\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",			"\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",			"\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",			"\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",			"\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",			"\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",			"\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",			"\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",			"\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",			"\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",			"\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",			"\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",			"\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",			"\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",			"\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",			"\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",			"\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",			"\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",			"\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",			"\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",			"\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",			"\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"		);		return strtr(strtoupper($string), $UTF8_LOWER_TO_UPPER);	}	/**	* UTF-8 aware alternative to substr	* Return part of a string given character offset (and optionally length)	*	* Note arguments: comparied to substr - if offset or length are	* not integers, this version will not complain but rather massages them	* into an integer.	*	* Note on returned values: substr documentation states false can be	* returned in some cases (e.g. offset > string length)	* mb_substr never returns false, it will return an empty string instead.	* This adopts the mb_substr approach	*	* Note on implementation: PCRE only supports repetitions of less than	* 65536, in order to accept up to MAXINT values for offset and length,	* we'll repeat a group of 65535 characters when needed.	*	* Note on implementation: calculating the number of characters in the	* string is a relatively expensive operation, so we only carry it out when	* necessary. It isn't necessary for +ve offsets and no specified length	*	* @author Chris Smith<chris@jalakai.co.uk>	* @param string $str	* @param integer $offset number of UTF-8 characters offset (from left)	* @param integer $length (optional) length in UTF-8 characters from offset	* @return mixed string or FALSE if failure	*/	function utf8_substr($str, $offset, $length = NULL)	{		// generates E_NOTICE		// for PHP4 objects, but not PHP5 objects		$str = (string) $str;		$offset = (int) $offset;		if (!is_null($length))		{			$length = (int) $length;		}		// handle trivial cases		if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))		{			return '';		}		// normalise negative offsets (we could use a tail		// anchored pattern, but they are horribly slow!)		if ($offset < 0)		{			// see notes			$strlen = utf8_strlen($str);			$offset = $strlen + $offset;			if ($offset < 0)			{				$offset = 0;			}		}		$op = '';		$lp = '';		// establish a pattern for offset, a		// non-captured group equal in length to offset		if ($offset > 0)		{			$ox = (int) ($offset / 65535);			$oy = $offset % 65535;			if ($ox)			{				$op = '(?:.{65535}){' . $ox . '}';			}			$op = '^(?:' . $op . '.{' . $oy . '})';		}		else		{				// offset == 0; just anchor the pattern			$op = '^';		}		// establish a pattern for length		if (is_null($length))		{			// the rest of the string			$lp = '(.*)$';		}		else		{			if (!isset($strlen))			{				// see notes				$strlen = utf8_strlen($str);			}			// another trivial case			if ($offset > $strlen)			{				return '';			}			if ($length > 0)			{				// reduce any length that would				// go passed the end of the string				$length = min($strlen - $offset, $length);				$lx = (int) ($length / 65535);				$ly = $length % 65535;								// negative length requires a captured group				// of length characters				if ($lx)				{					$lp = '(?:.{65535}){' . $lx . '}';				}				$lp = '(' . $lp . '.{'. $ly . '})';			}			else if ($length < 0)			{				if ($length < ($offset - $strlen))				{					return '';				}				$lx = (int)((-$length) / 65535);				$ly = (-$length) % 65535;				// negative length requires ... capture everything				// except a group of -length characters				// anchored at the tail-end of the string				if ($lx)				{					$lp = '(?:.{65535}){' . $lx . '}';				}				$lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';			}		}		if (!preg_match('#' . $op . $lp . '#us', $str, $match))		{			return '';		}		return $match[1];	}	/**	* Return the length (in characters) of a UTF-8 string	*	* @param	string	$text		UTF-8 string	* @return	integer				Length (in chars) of given string	*/	function utf8_strlen($text)	{		// Since utf8_decode is replacing multibyte characters to ? strlen works fine		return strlen(utf8_decode($text));	}}/*** UTF-8 aware alternative to str_split* Convert a string to an array* * @author Harry Fuecks* @param string $str UTF-8 encoded* @param int $split_len number to characters to split string by* @return string characters in string reverses*/function utf8_str_split($str, $split_len = 1){	if (!is_int($split_len) || $split_len < 1)	{		return false;	}	$len = utf8_strlen($str);	if ($len <= $split_len)	{		return array($str);	}		preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);	return $ar[0];}/*** UTF-8 aware alternative to strcspn* Find length of initial segment not matching mask* * @author Harry Fuecks*/function utf8_strspn($str, $mask, $start = null, $length = null){	if ($start !== null || $length !== null)	{		$str = utf8_substr($str, $start, $length);	}	preg_match('/^[' . $mask . ']+/u', $str, $matches);	if (isset($matches[0]))	{		return utf8_strlen($matches[0]);	}	return 0;}/*** UTF-8 aware alternative to ucfirst* Make a string's first character uppercase* * @author Harry Fuecks* @param string* @return string with first character as upper case (if applicable)*/function utf8_ucfirst($str){	switch (utf8_strlen($str))	{		case 0:			return '';		break;		case 1:			return utf8_strtoupper($str);		break;		default:			preg_match('/^(.{1})(.*)$/us', $str, $matches);			return utf8_strtoupper($matches[1]) . $matches[2];		break;	}}/*** Recode a string to UTF-8** If the encoding is not supported, the string is returned as-is** @param	string	$string		Original string* @param	string	$encoding	Original encoding (lowered)* @return	string				The string, encoded in UTF-8*/function utf8_recode($string, $encoding){	$encoding = strtolower($encoding);	if ($encoding == 'utf-8' || !is_string($string) || !isset($string[0]))	{		return $string;	}	// start with something simple	if ($encoding == 'iso-8859-1')	{		return utf8_encode($string);	}	// First, try iconv()	if (function_exists('iconv'))	{		$ret = @iconv($encoding, 'utf-8', $string);		if (isset($ret[0]))		{			return $ret;		}	}	// Try the mb_string extension	if (function_exists('mb_convert_encoding'))	{		$ret = @mb_convert_encoding($string, 'utf-8', $encoding);		if (isset($ret[0]))		{			return $ret;		}	}	// Try the recode extension	if (function_exists('recode_string'))	{		$ret = @recode_string($encoding . '..utf-8', $string);		if (isset($ret[0]))		{			return $ret;		}	}	// If nothing works, check if we have a custom transcoder available	if (!preg_match('#^[a-z0-9\\-]+$#', $encoding))	{		// Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files		trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);	}	global $phpbb_root_path, $phpEx;	// iso-8859-* character encoding	if (preg_match('/iso[_ -]?8859[_ -]?(\\d+)/', $encoding, $array))	{		switch ($array[1])		{			case '1':			case '2':			case '4':			case '7':			case '9':			case '15':				if (!function_exists('iso_8859_' . $array[1]))				{					if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))					{						trigger_error('Basic reencoder file is missing', E_USER_ERROR);					}					include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);				}				return call_user_func('iso_8859_' . $array[1], $string);			break;			default:				trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);			break;		}	}	// CP/WIN character encoding	if (preg_match('/(?:cp|windows)[_\- ]?(\\d+)/', $encoding, $array))	{		switch ($array[1])		{			case '932':			break;			case '1250':			case '1251':			case '1254':			case '1255':			case '1256':			case '1257':			case '874':				if (!function_exists('cp' . $array[1]))				{					if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))					{						trigger_error('Basic reencoder file is missing', E_USER_ERROR);					}					include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);				}				return call_user_func('cp' . $array[1], $string);			break;			default:				trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);			break;		}	}	// TIS-620	if (preg_match('/tis[_ -]?620/', $encoding))	{		if (!function_exists('tis_620'))		{			if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))			{				trigger_error('Basic reencoder file is missing', E_USER_ERROR);			}			include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);		}		return tis_620($string);	}	// SJIS	if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))	{		if (!function_exists('sjis'))		{			if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))			{				trigger_error('CJK reencoder file is missing', E_USER_ERROR);			}			include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);		}		return sjis($string);	}	// EUC_KR	if (preg_match('/euc[_ -]?kr/', $encoding))	{		if (!function_exists('euc_kr'))		{			if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -