📄 convertcharset.class.php
字号:
* 0x01 0x0001 # START OF HEADING
* # Oh, one more thing, you can make comments inside of a rows if you like.
* 0x02 0x0002 # START OF TEXT
* 0x03 0x0003 # END OF TEXT
* next line, and so on...
* </code>
*
* You can get full tables with encodings from http://www.unicode.org
*
* @param string $FirstEncoding Name of first encoding and first encoding filename (thay have to be the same)
* @param string $SecondEncoding Name of second encoding and second encoding filename (thay have to be the same). Optional for building a joined table.
* @return array Table necessary to change one encoding to another.
**/
function MakeConvertTable ($FirstEncoding, $SecondEncoding = "")
{
$ConvertTable = array();
for($i = 0; $i < func_num_args(); $i++)
{
/**
* Because func_*** can't be used inside of another function call
* we have to save it as a separate value.
**/
$FileName = func_get_arg($i);
if (!is_file(CONVERT_TABLES_DIR . $FileName))
{
print $this->DebugOutput(0, 0, CONVERT_TABLES_DIR . $FileName); //Print an error message
exit;
}
$FileWithEncTabe = fopen(CONVERT_TABLES_DIR . $FileName, "r") or die(); //This die(); is just to make sure...
while(!feof($FileWithEncTabe))
{
/**
* We asume that line is not longer
* than 1024 which is the default value for fgets function
**/
if($OneLine=trim(fgets($FileWithEncTabe, 1024)))
{
/**
* We don't need all comment lines. I check only for "#" sign, because
* this is a way of making comments by unicode.org in thair encoding files
* and that's where the files are from :-)
**/
if (substr($OneLine, 0, 1) != "#")
{
/**
* Sometimes inside the charset file the hex walues are separated by
* "space" and sometimes by "tab", the below preg_split can also be used
* to split files where separator is a ",", "\r", "\n" and "\f"
**/
$HexValue = preg_split ("/[\s,]+/", $OneLine, 3); //We need only first 2 values
/**
* Sometimes char is UNDEFINED, or missing so we can't use it for convertion
**/
if (substr($HexValue[1], 0, 1) != "#")
{
$ArrayKey = strtoupper(str_replace(strtolower("0x"), "", $HexValue[1]));
$ArrayValue = strtoupper(str_replace(strtolower("0x"), "", $HexValue[0]));
$ConvertTable[func_get_arg($i)][$ArrayKey] = $ArrayValue;
}
} //if (substr($OneLine,...
} //if($OneLine=trim(f...
} //while(!feof($FirstFileWi...
} //for($i = 0; $i < func_...
/**
* The last thing is to check if by any reason both encoding tables are not the same.
* For example, it will happen when you save the encoding table file with a wrong name
* - of another charset.
**/
if ((func_num_args() > 1) && (count($ConvertTable[$FirstEncoding]) == count($ConvertTable[$SecondEncoding])) && (count(array_diff_assoc($ConvertTable[$FirstEncoding], $ConvertTable[$SecondEncoding])) == 0))
{
print $this->DebugOutput(1, 1, "$FirstEncoding, $SecondEncoding");
}
return $ConvertTable;
}
/**
* ConvertCharset::Convert()
*
* This is a basic function you are using. I hope that you can figure out this function syntax :-)
*
* @param string $StringToChange The string you want to change :)
* @param string $FromCharset Name of $StringToChange encoding, you have to know it.
* @param string $ToCharset Name of a charset you want to get for $StringToChange.
* @param boolean $TurnOnEntities Set to true or 1 if you want to use numeric entities insted of regular chars.
* @return string Converted string in brand new encoding :)
* @version 1.0 2004-07-27 01:09
**/
function Convert ($StringToChange, $FromCharset, $ToCharset, $TurnOnEntities = false)
{
/**
* Check are there all variables
**/
if ($StringToChange == "")
{
print $this->DebugOutput(0, 3, "\$StringToChange");
}
else if ($FromCharset == "")
{
print $this->DebugOutput(0, 3, "\$FromCharset");
}
else if ($ToCharset == "")
{
print $this->DebugOutput(0, 3, "\$ToCharset");
}
/**
* Now a few variables need to be set.
**/
$NewString = "";
$this->Entities = $TurnOnEntities;
/**
* For all people who like to use uppercase for charset encoding names :)
**/
$FromCharset = strtolower($FromCharset);
$ToCharset = strtolower($ToCharset);
/**
* Of course you can make a conversion from one charset to the same one :)
* but I feel obligate to let you know about it.
**/
if ($FromCharset == $ToCharset)
{
print $this->DebugOutput(1, 0, $FromCharset);
}
if (($FromCharset == $ToCharset) AND ($FromCharset == "utf-8"))
{
print $this->DebugOutput(0, 4, $FromCharset);
exit;
}
/**
* This divison was made to prevent errors during convertion to/from utf-8 with
* "entities" enabled, because we need to use proper destination(to)/source(from)
* encoding table to write proper entities.
*
* This is the first case. We are convertinf from 1byte chars...
**/
if ($FromCharset != "utf-8")
{
/**
* Now build table with both charsets for encoding change.
**/
if ($ToCharset != "utf-8")
{
$CharsetTable = $this->MakeConvertTable ($FromCharset, $ToCharset);
}
else
{
$CharsetTable = $this->MakeConvertTable ($FromCharset);
}
/**
* For each char in a string...
**/
for ($i = 0; $i < strlen($StringToChange); $i++)
{
$HexChar = "";
$UnicodeHexChar = "";
$HexChar = strtoupper(dechex(ord($StringToChange[$i])));
// This is fix from Mario Klingemann, it prevents
// droping chars below 16 because of missing leading 0 [zeros]
if (strlen($HexChar)==1) $HexChar = "0".$HexChar;
//end of fix by Mario Klingemann
// This is quick fix of 10 chars in gsm0338
// Thanks goes to Andrea Carpani who pointed on this problem
// and solve it ;)
if (($FromCharset == "gsm0338") && ($HexChar == '1B')) {
$i++;
$HexChar .= strtoupper(dechex(ord($StringToChange[$i])));
}
// end of workarround on 10 chars from gsm0338
if ($ToCharset != "utf-8")
{
if (in_array($HexChar, $CharsetTable[$FromCharset]))
{
$UnicodeHexChar = array_search($HexChar, $CharsetTable[$FromCharset]);
$UnicodeHexChars = explode("+",$UnicodeHexChar);
for($UnicodeHexCharElement = 0; $UnicodeHexCharElement < count($UnicodeHexChars); $UnicodeHexCharElement++)
{
if (array_key_exists($UnicodeHexChars[$UnicodeHexCharElement], $CharsetTable[$ToCharset]))
{
if ($this->Entities == true)
{
$NewString .= $this->UnicodeEntity($this->HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]));
}
else
{
$NewString .= chr(hexdec($CharsetTable[$ToCharset][$UnicodeHexChars[$UnicodeHexCharElement]]));
}
}
else
{
print $this->DebugOutput(0, 1, $StringToChange[$i]);
}
} //for($UnicodeH...
}
else
{
print $this->DebugOutput(0, 2,$StringToChange[$i]);
}
}
else
{
if (in_array("$HexChar", $CharsetTable[$FromCharset]))
{
$UnicodeHexChar = array_search($HexChar, $CharsetTable[$FromCharset]);
/**
* Sometimes there are two or more utf-8 chars per one regular char.
* Extream, example is polish old Mazovia encoding, where one char contains
* two lettes 007a (z) and 0142 (l slash), we need to figure out how to
* solve this problem.
* The letters are merge with "plus" sign, there can be more than two chars.
* In Mazowia we have 007A+0142, but sometimes it can look like this
* 0x007A+0x0142+0x2034 (that string means nothing, it just shows the possibility...)
**/
$UnicodeHexChars = explode("+",$UnicodeHexChar);
for($UnicodeHexCharElement = 0; $UnicodeHexCharElement < count($UnicodeHexChars); $UnicodeHexCharElement++)
{
if ($this->Entities == true)
{
$NewString .= $this->UnicodeEntity($this->HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]));
}
else
{
$NewString .= $this->HexToUtf($UnicodeHexChars[$UnicodeHexCharElement]);
}
} // for
}
else
{
print $this->DebugOutput(0, 2, $StringToChange[$i]);
}
}
}
}
/**
* This is second case. We are encoding from multibyte char string.
**/
else if($FromCharset == "utf-8")
{
$HexChar = "";
$UnicodeHexChar = "";
$CharsetTable = $this->MakeConvertTable ($ToCharset);
foreach ($CharsetTable[$ToCharset] as $UnicodeHexChar => $HexChar)
{
if ($this->Entities == true) {
$EntitieOrChar = $this->UnicodeEntity($this->HexToUtf($UnicodeHexChar));
}
else
{
$EntitieOrChar = chr(hexdec($HexChar));
}
$StringToChange = str_replace($this->HexToUtf($UnicodeHexChar), $EntitieOrChar, $StringToChange);
}
$NewString = $StringToChange;
}
return $NewString;
}
/**
* ConvertCharset::DebugOutput()
*
* This function is not really necessary, the debug output could stay inside of
* source code but like this, it's easier to manage and translate.
* Besides I couldn't find good coment/debug class :-) Maybe I'll write one someday...
*
* All messages depend on DEBUG_MODE level, as I was writing before you can set this value to:
* - -1 - No errors or notces are shown
* - 0 - Only error messages are shown, no notices
* - 1 - Error messages and notices are shown
*
* @param int $Group Message groupe: error - 0, notice - 1
* @param int $Number Following message number
* @param mix $Value This walue is whatever you want, usualy it's some parameter value, for better message understanding.
* @return string String with a proper message.
**/
function DebugOutput ($Group, $Number, $Value = false)
{
//$Debug [$Group][$Number] = "Message, can by with $Value";
//$Group[0] - Errors
//$Group[1] - Notice
$Debug[0][0] = "Error, can NOT read file: " . $Value . "<br>";
$Debug[0][1] = "Error, can't find maching char \"". $Value ."\" in destination encoding table!" . "<br>";
$Debug[0][2] = "Error, can't find maching char \"". $Value ."\" in source encoding table!" . "<br>";
$Debug[0][3] = "Error, you did NOT set variable " . $Value . " in Convert() function." . "<br>";
$Debug[0][4] = "You can NOT convert string from " . $Value . " to " . $Value . "!" . "<BR>";
$Debug[1][0] = "Notice, you are trying to convert string from ". $Value ." to ". $Value .", don't you feel it's strange? ;-)" . "<br>";
$Debug[1][1] = "Notice, both charsets " . $Value . " are identical! Check encoding tables files." . "<br>";
$Debug[1][2] = "Notice, there is no unicode char in the string you are trying to convert." . "<br>";
if (DEBUG_MODE >= $Group)
{
return $Debug[$Group][$Number];
}
} // function DebugOutput
} //class ends here
?>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -