📄 convertcharset.class.php

📁 sabreipb 2.1.6 utf-8中文版本！
💻 PHP
📖 第 1 页 / 共 2 页
字号:
上一页 12
	 * 0x01	0x0001	#	START OF HEADING
	 * # Oh, one more thing, you can make comments inside of a rows if you like.
	 * 0x02	0x0002	#	START OF TEXT
	 * 0x03	0x0003	#	END OF TEXT
	 * next line, and so on...
	 * </code>
	 * 
	 * You can get full tables with encodings from http://www.unicode.org
	 * 
	 * @param string $FirstEncoding Name of first encoding and first encoding filename (thay have to be the same)
	 * @param string $SecondEncoding Name of second encoding and second encoding filename (thay have to be the same). Optional for building a joined table.
	 * @return array Table necessary to change one encoding to another.
	 **/
	function MakeConvertTable ($FirstEncoding, $SecondEncoding = "") 
	{
		$ConvertTable = array();
		for($i = 0; $i < func_num_args(); $i++)
		{
			/**
			 * Because func_*** can't be used inside of another function call
			 * we have to save it as a separate value.
			 **/
			$FileName = func_get_arg($i);
			if (!is_file(CONVERT_TABLES_DIR . $FileName)) 
			{
			    print $this->DebugOutput(0, 0, CONVERT_TABLES_DIR . $FileName); //Print an error message
					exit;
			}
			$FileWithEncTabe = fopen(CONVERT_TABLES_DIR . $FileName, "r") or die(); //This die(); is just to make sure...
		  while(!feof($FileWithEncTabe))
			{
				/**
				 * We asume that line is not longer
				 * than 1024 which is the default value for fgets function 
				 **/
		   if($OneLine=trim(fgets($FileWithEncTabe, 1024)))
			 {
				/**
				 * We don't need all comment lines. I check only for "#" sign, because
				 * this is a way of making comments by unicode.org in thair encoding files
				 * and that's where the files are from :-)
				 **/
		   	if (substr($OneLine, 0, 1) != "#") 
				{
					/**
					 * Sometimes inside the charset file the hex walues are separated by
					 * "space" and sometimes by "tab", the below preg_split can also be used
					 * to split files where separator is a ",", "\r", "\n" and "\f"
					 **/
					$HexValue = preg_split ("/[\s,]+/", $OneLine, 3);  //We need only first 2 values
						/**
						 * Sometimes char is UNDEFINED, or missing so we can't use it for convertion
						 **/
						if (substr($HexValue[1], 0, 1) != "#") 
						{
								$ArrayKey = strtoupper(str_replace(strtolower("0x"), "", $HexValue[1]));
								$ArrayValue = strtoupper(str_replace(strtolower("0x"), "", $HexValue[0]));
								$ConvertTable[func_get_arg($i)][$ArrayKey] = $ArrayValue;
						}
				} //if (substr($OneLine,...
		   } //if($OneLine=trim(f...
		  } //while(!feof($FirstFileWi...
		} //for($i = 0; $i < func_...
	/**
	 * The last thing is to check if by any reason both encoding tables are not the same.
	 * For example, it will happen when you save the encoding table file with a wrong name
	 *  - of another charset. 
	 **/
	if ((func_num_args() > 1) && (count($ConvertTable[$FirstEncoding]) == count($ConvertTable[$SecondEncoding])) && (count(array_diff_assoc($ConvertTable[$FirstEncoding], $ConvertTable[$SecondEncoding])) == 0)) 
	{
	    print $this->DebugOutput(1, 1, "$FirstEncoding, $SecondEncoding");
	}
	return $ConvertTable;
	}
	
	
	
	/**
	 * ConvertCharset::Convert()
	 * 
	 * This is a basic function you are using. I hope that you can figure out this function syntax :-)
	 * 
	 * @param string $StringToChange The string you want to change :)
	 * @param string $FromCharset Name of $StringToChange encoding, you have to know it.
	 * @param string $ToCharset Name of a charset you want to get for $StringToChange.
	 * @param boolean $TurnOnEntities Set to true or 1 if you want to use numeric entities insted of regular chars.
	 * @return string Converted string in brand new encoding :)
	 * @version 1.0 2004-07-27 01:09
	 **/
	function Convert ($StringToChange, $FromCharset, $ToCharset, $TurnOnEntities = false)
	{
		/**
		 * Check are there all variables 
		 **/
		 if ($StringToChange == "") 
		 {
				print $this->DebugOutput(0, 3, "\$StringToChange");
		 }
		 else if ($FromCharset == "") 
		 {
		 		print $this->DebugOutput(0, 3, "\$FromCharset");	
		 }
		 else if ($ToCharset == "") 
		 {
		 		print $this->DebugOutput(0, 3, "\$ToCharset");	
		 }
		 
		/**
		 * Now a few variables need to be set. 
		 **/
		$NewString = "";
		$this->Entities = $TurnOnEntities;
		
		/**
		 * For all people who like to use uppercase for charset encoding names :) 
		 **/
		$FromCharset = strtolower($FromCharset);
		$ToCharset   = strtolower($ToCharset);

		/**
		 * Of course you can make a conversion from one charset to the same one :) 
		 * but I feel obligate to let you know about it. 
		 **/
		if ($FromCharset == $ToCharset) 
		{
		    print $this->DebugOutput(1, 0, $FromCharset);
		}
		if (($FromCharset == $ToCharset) AND ($FromCharset == "utf-8")) 
		{
		    print $this->DebugOutput(0, 4, $FromCharset);
				exit;
		}
		
		/**
		 * This divison was made to prevent errors during convertion to/from utf-8 with
		 * "entities" enabled, because we need to use proper destination(to)/source(from)
		 * encoding table to write proper entities.
		 * 
		 * This is the first case. We are convertinf from 1byte chars...
		 **/
		if ($FromCharset != "utf-8") 
		{
				/**
				 * Now build table with both charsets for encoding change. 
				 **/
				if ($ToCharset != "utf-8") 
				{
					$CharsetTable = $this->MakeConvertTable ($FromCharset, $ToCharset);
				}
				else
				{
					$CharsetTable = $this->MakeConvertTable ($FromCharset);
				}
				/**
				 * For each char in a string... 
				 **/
				for ($i = 0; $i < strlen($StringToChange); $i++)
				{
					$HexChar = "";
					$UnicodeHexChar = "";
					$HexChar = strtoupper(dechex(ord($StringToChange[$i])));
					// This is fix from Mario Klingemann, it prevents
					// droping chars below 16 because of missing leading 0 [zeros]
					if (strlen($HexChar)==1) $HexChar = "0".$HexChar;
					//end of fix by Mario Klingemann
					// This is quick fix of 10 chars in gsm0338
					// Thanks goes to Andrea Carpani who pointed on this problem
					// and solve it ;)
					if (($FromCharset == "gsm0338") && ($HexChar == '1B')) {
						$i++;
						$HexChar .= strtoupper(dechex(ord($StringToChange[$i])));
					}
					// end of workarround on 10 chars from gsm0338
					if ($ToCharset != "utf-8") 
					{
						if (in_array($HexChar, $CharsetTable[$FromCharset]))
						{
							$UnicodeHexChar = array_search($HexChar, $CharsetTable[$FromCharset]);
							$UnicodeHexChars = explode("+",$UnicodeHexChar);
							for($UnicodeHexCharElement = 0; $UnicodeHexCharElement < count($UnicodeHexChars); $UnicodeHexCharElement++)
							{
							  if (array_key_exists($UnicodeHexChars[$UnicodeHexCharElement], $CharsetTable[$ToCharset])) 
								{
									if ($this->Entities == true) 
									{
										$NewString .= $this->UnicodeEntity($this->HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]));
									}
									else
									{
										$NewString .= chr(hexdec($CharsetTable[$ToCharset][$UnicodeHexChars[$UnicodeHexCharElement]]));
									}
								}
							 	else
								{
										print $this->DebugOutput(0, 1, $StringToChange[$i]);
								}
							} //for($UnicodeH...
						}
						else
						{
							print $this->DebugOutput(0, 2,$StringToChange[$i]);
						}
					}
					else
					{
						if (in_array("$HexChar", $CharsetTable[$FromCharset])) 
						{
							$UnicodeHexChar = array_search($HexChar, $CharsetTable[$FromCharset]);
							/**
					     * Sometimes there are two or more utf-8 chars per one regular char.
							 * Extream, example is polish old Mazovia encoding, where one char contains
							 * two lettes 007a (z) and 0142 (l slash), we need to figure out how to
							 * solve this problem.
							 * The letters are merge with "plus" sign, there can be more than two chars.
							 * In Mazowia we have 007A+0142, but sometimes it can look like this
							 * 0x007A+0x0142+0x2034 (that string means nothing, it just shows the possibility...)
					     **/
							$UnicodeHexChars = explode("+",$UnicodeHexChar);
							for($UnicodeHexCharElement = 0; $UnicodeHexCharElement < count($UnicodeHexChars); $UnicodeHexCharElement++)
							{
								if ($this->Entities == true) 
								{
									$NewString .= $this->UnicodeEntity($this->HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]));
								}
								else
								{
									$NewString .= $this->HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]);
								}
							} // for							
						}
						else
						{
							print $this->DebugOutput(0, 2, $StringToChange[$i]);
						}
					}					
				}
		}
		/**
		 * This is second case. We are encoding from multibyte char string. 
		 **/
		else if($FromCharset == "utf-8")
		{
			$HexChar = "";
			$UnicodeHexChar = "";
			$CharsetTable = $this->MakeConvertTable ($ToCharset);
			foreach ($CharsetTable[$ToCharset] as $UnicodeHexChar => $HexChar)
			{
					if ($this->Entities == true) {
						$EntitieOrChar = $this->UnicodeEntity($this->HexToUtf($UnicodeHexChar));
					}
					else
					{
						$EntitieOrChar = chr(hexdec($HexChar));
					}
					$StringToChange = str_replace($this->HexToUtf($UnicodeHexChar), $EntitieOrChar, $StringToChange);
			}
			$NewString = $StringToChange;
		}
	
	return $NewString;
	}
	
	/**
	 * ConvertCharset::DebugOutput()
	 * 
	 * This function is not really necessary, the debug output could stay inside of
	 * source code but like this, it's easier to manage and translate.
	 * Besides I couldn't find good coment/debug class :-) Maybe I'll write one someday... 
	 * 
	 * All messages depend on DEBUG_MODE level, as I was writing before you can set this value to:
   * - -1 - No errors or notces are shown
   * - 0  - Only error messages are shown, no notices 
   * - 1  - Error messages and notices are shown
	 * 
	 * @param int $Group Message groupe: error - 0, notice - 1
	 * @param int $Number Following message number 
	 * @param mix $Value This walue is whatever you want, usualy it's some parameter value, for better message understanding.
	 * @return string String with a proper message.
	 **/
	function DebugOutput ($Group, $Number, $Value = false)
	{
		//$Debug [$Group][$Number] = "Message, can by with $Value";
		//$Group[0] - Errors
		//$Group[1] - Notice
		$Debug[0][0] = "Error, can NOT read file: " . $Value . "<br>";
		$Debug[0][1] = "Error, can't find maching char \"". $Value ."\" in destination encoding table!" . "<br>";
		$Debug[0][2] = "Error, can't find maching char \"". $Value ."\" in source encoding table!" . "<br>";
		$Debug[0][3] = "Error, you did NOT set variable " . $Value . " in Convert() function." . "<br>";
		$Debug[0][4] = "You can NOT convert string from " . $Value . " to " . $Value . "!" .  "<BR>";
		$Debug[1][0] = "Notice, you are trying to convert string from ". $Value ." to ". $Value .", don't you feel it's strange? ;-)" . "<br>";
		$Debug[1][1] = "Notice, both charsets " . $Value . " are identical! Check encoding tables files." . "<br>";
		$Debug[1][2] = "Notice, there is no unicode char in the string you are trying to convert." . "<br>";
		
		if (DEBUG_MODE >= $Group) 
		{
	  	return $Debug[$Group][$Number];
		}
	} // function DebugOutput

} //class ends here
?>
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -