📄 unicode.php
字号:
} // Convert the array of valid unicode character numbers back to UTF-16 encoded text $UTF16_text = unicode_array_to_UTF16( $new_unicode_array, $MSB_first ); // Escape any special HTML characters present $UTF16_text = htmlspecialchars ( $UTF16_text, ENT_QUOTES ); // Escape CR, LF and TAB characters, so that they are kept and not treated as expendable white space $trans = array( "\x09" => "	", "\x0A" => "
", "\x0D" => "
" ); $UTF16_text = strtr( $UTF16_text, $trans ); // Return the resulting XML valid string return $UTF16_text;}/******************************************************************************* End of Function: xml_UTF16_clean******************************************************************************//******************************************************************************** Function: HTML_UTF8_Escape** Description: A HTML page can display UTF-8 data properly if it has a* META http-equiv="Content-Type" tag with the content attribute* including the value: "charset=utf-8".* Otherwise the ISO-8859-1 character set is usually assumed, and* Unicode values above 0x7F must be escaped.* This function takes a UTF-8 encoded string and escapes the* characters above 0x7F as well as reserved HTML characters such* as Quotes, Greater than, Less than and Ampersand.** Parameters: utf8_text - a string containing the UTF-8 data** Returns: htmloutput - a string containing the HTML equivalent*******************************************************************************/function HTML_UTF8_Escape( $UTF8_text ){ // Ensure that the Unicode UTF8 encoding is valid. $UTF8_text = UTF8_fix( $UTF8_text ); // Change: changed to use smart_htmlspecialchars, so that characters which were already escaped would remain intact, as of revision 1.10 // Escape any special HTML characters present $UTF8_text = smart_htmlspecialchars( $UTF8_text, ENT_QUOTES ); // Convert the UTF-8 string to an array of unicode character numbers $unicode_array = UTF8_to_unicode_array( $UTF8_text ); // Create a string to receive the escaped HTML $htmloutput = ""; // Cycle through the unicode character numbers foreach( $unicode_array as $unichar ) { // Check if the character needs to be escaped if ( ( $unichar >= 0x00 ) && ( $unichar <= 0x7F ) ) { // Character is less than 0x7F - add it to the html as is $htmloutput .= chr( $unichar ); } else { // Character is greater than 0x7F - escape it and add it to the html $htmloutput .= "&#x" . dechex($unichar) . ";"; } } // Return the resulting escaped HTML return $htmloutput;}/******************************************************************************* End of Function: HTML_UTF8_Escape******************************************************************************//******************************************************************************** Function: HTML_UTF8_UnEscape** Description: Converts HTML which contains escaped decimal or hex characters* into UTF-8 text** Parameters: HTML_text - a string containing the HTML text to convert** Returns: utfoutput - a string containing the UTF-8 equivalent*******************************************************************************/function HTML_UTF8_UnEscape( $HTML_text ){ preg_match_all( "/\&\#(\d+);/", $HTML_text, $matches); preg_match_all( "/\&\#[x|X]([A|B|C|D|E|F|a|b|c|d|e|f|0-9]+);/", $HTML_text, $hexmatches); foreach( $hexmatches[1] as $index => $match ) { $matches[0][] = $hexmatches[0][$index]; $matches[1][] = hexdec( $match ); } for ( $i = 0; $i < count( $matches[ 0 ] ); $i++ ) { $trans = array( $matches[0][$i] => unicode_array_to_UTF8( array( $matches[1][$i] ) ) ); $HTML_text = strtr( $HTML_text , $trans ); } return $HTML_text;}/******************************************************************************* End of Function: HTML_UTF8_UnEscape******************************************************************************//******************************************************************************** Function: HTML_UTF16_Escape** Description: A HTML page can display UTF-16 data properly if it has a* META http-equiv="Content-Type" tag with the content attribute* including the value: "charset=utf-16".* Otherwise the ISO-8859-1 character set is usually assumed, and* Unicode values above 0x7F must be escaped.* This function takes a UTF-16 encoded string and escapes the* characters above 0x7F as well as reserved HTML characters such* as Quotes, Greater than, Less than and Ampersand.** Parameters: utf16_text - a string containing the UTF-16 data* MSB_first - True will cause processing as Big Endian UTF-16 (Motorola, MSB first)* False will cause processing as Little Endian UTF-16 (Intel, LSB first)** Returns: htmloutput - a string containing the HTML equivalent*******************************************************************************/function HTML_UTF16_Escape( $UTF16_text, $MSB_first ){ // Ensure that the Unicode UTF16 encoding is valid. $UTF16_text = UTF16_fix( $UTF16_text, $MSB_first ); // Change: changed to use smart_htmlspecialchars, so that characters which were already escaped would remain intact, as of revision 1.10 // Escape any special HTML characters present $UTF16_text = smart_htmlspecialchars( $UTF16_text ); // Convert the UTF-16 string to an array of unicode character numbers $unicode_array = UTF16_to_unicode_array( $UTF16_text, $MSB_first ); // Create a string to receive the escaped HTML $htmloutput = ""; // Cycle through the unicode character numbers foreach( $unicode_array as $unichar ) { // Check if the character needs to be escaped if ( ( $unichar >= 0x00 ) && ( $unichar <= 0x7F ) ) { // Character is less than 0x7F - add it to the html as is $htmloutput .= chr( $unichar ); } else { // Character is greater than 0x7F - escape it and add it to the html $htmloutput .= "&#x" . dechex($unichar) . ";"; } } // Return the resulting escaped HTML return $htmloutput;}/******************************************************************************* End of Function: HTML_UTF16_Escape******************************************************************************//******************************************************************************** Function: HTML_UTF16_UnEscape** Description: Converts HTML which contains escaped decimal or hex characters* into UTF-16 text** Parameters: HTML_text - a string containing the HTML text to be converted* MSB_first - True will cause processing as Big Endian UTF-16 (Motorola, MSB first)* False will cause processing as Little Endian UTF-16 (Intel, LSB first)** Returns: utfoutput - a string containing the UTF-16 equivalent*******************************************************************************/function HTML_UTF16_UnEscape( $HTML_text, $MSB_first ){ $utf8_text = HTML_UTF8_UnEscape( $HTML_text ); return unicode_array_to_UTF16( UTF8_to_unicode_array( $utf8_text ), $MSB_first );}/******************************************************************************* End of Function: HTML_UTF16_UnEscape******************************************************************************//******************************************************************************** Function: smart_HTML_Entities** Description: Performs the same function as HTML_Entities, but leaves entities* that are already escaped intact.** Parameters: HTML_text - a string containing the HTML text to be escaped** Returns: HTML_text_out - a string containing the escaped HTML text*******************************************************************************/function smart_HTML_Entities( $HTML_text ){ // Get a table containing the HTML entities translations $translation_table = get_html_translation_table( HTML_ENTITIES ); // Change the ampersand to translate to itself, to avoid getting & $translation_table[ chr(38) ] = '&'; // Perform replacements // Regular expression says: find an ampersand, check the text after it, // if the text after it is not one of the following, then replace the ampersand // with & // a) any combination of up to 4 letters (upper or lower case) with at least 2 or 3 non whitespace characters, then a semicolon // b) a hash symbol, then between 2 and 7 digits // c) a hash symbol, an 'x' character, then between 2 and 7 digits // d) a hash symbol, an 'X' character, then between 2 and 7 digits return preg_replace( "/&(?![A-Za-z]{0,4}\w{2,3};|#[0-9]{2,7}|#x[0-9]{2,7}|#X[0-9]{2,7};)/","&" , strtr( $HTML_text, $translation_table ) );}/******************************************************************************* End of Function: smart_HTML_Entities******************************************************************************//******************************************************************************** Function: smart_htmlspecialchars** Description: Performs the same function as htmlspecialchars, but leaves characters* that are already escaped intact.** Parameters: HTML_text - a string containing the HTML text to be escaped** Returns: HTML_text_out - a string containing the escaped HTML text*******************************************************************************/function smart_htmlspecialchars( $HTML_text , $quotes=ENT_COMPAT){ // Get a table containing the HTML special characters translations $translation_table=get_html_translation_table (HTML_SPECIALCHARS); // Change the ampersand to translate to itself, to avoid getting & $translation_table[ chr(38) ] = '&'; // Perform replacements // Regular expression says: find an ampersand, check the text after it, // if the text after it is not one of the following, then replace the ampersand // with & // a) any combination of up to 4 letters (upper or lower case) with at least 2 or 3 non whitespace characters, then a semicolon // b) a hash symbol, then between 2 and 7 digits // c) a hash symbol, an 'x' character, then between 2 and 7 digits // d) a hash symbol, an 'X' character, then between 2 and 7 digits $str = preg_replace( "/&(?![A-Za-z]{0,4}\w{2,3};|#[0-9]{2,7}|#x[0-9]{2,7}|#X[0-9]{2,7};)/","&" , strtr( $HTML_text, $translation_table ) ); switch($quotes) { case 0; // ENT_NOQUOTES break; case 2: // ENT_COMPAT $str = str_replace('"','"',$str); break; case 3: // ENT_QUOTES $str = str_replace('"','"',$str); $str = str_replace("'",''',$str); break; } return $str;}/******************************************************************************* End of Function: smart_htmlspecialchars******************************************************************************/?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -