⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 unitcharsetmap.pas

📁 Delphi的另一款钢琴软件
💻 PAS
📖 第 1 页 / 共 2 页
字号:
unit unitCharsetMap;

interface

uses Windows, Classes, SysUtils, Graphics, MultiLanguage_TLB, richedit;

var
  CP_USASCII : Integer = 0;
  DefaultCodePage : Integer = 0;

function MIMECharsetNameToCodepage (const MIMECharsetName : string) : Integer;
function CharsetNameToCodepage (const CharsetName : string) : Integer;
function CodepageToMIMECharsetName (codepage : Integer) : string;
function CodepageToCharsetName (codepage : Integer) : string;
function CharsetToCodePage (FontCharset : TFontCharset) : Integer;
function CodePageToCharset (codePage : Integer) : TFontCharset;
function WideStringToString (const ws : WideString; codePage : Integer) : string;
function StringToWideString (const st : string; codePage : Integer) : WideString;
function StringToGDIString (const s : string; codePage : Integer) : string;
function GDIStringToString (const s : string; codePage : Integer) : string;
function URLSuffixToCodePage (urlSuffix : string) : Integer;
procedure GetCharsetNames (sl : TStrings);
function TrimEx(const S: string): string;
function IsWideCharAlpha (ch : WideChar) : boolean;
function IsWideCharAlnum (ch : WideChar) : boolean;
procedure FontToCharFormat(font: TFont; codePage : Integer; var Format: TCharFormatW);
function WideStringToUTF8 (const ws : WideString) : string;
function UTF8ToWideString (const st : string) : WideString;

var
  gIMultiLanguage : IMultiLanguage = Nil;
  gMultilanguageLoaded : boolean = False;
  gMultiLang : boolean = False;

implementation

uses ActiveX;

type
  TCharsetRec = record
    Name : string;
    URLSuffix : string;
    CodePage : Integer;
    CharSet : TFontCharSet;
    MIMECharsetName : string;
    CharsetName : string
  end;

var
  gCharsetMap : array [0..35] of TCharsetRec = (
    (Name:'US ASCII';                      URLSuffix:'';    CodePage: 0; Charset:0;   MIMECharsetName:'';            CharsetName:'ANSI_CHARSET'       ),
    (Name:'US ASCII';                      URLSuffix:'';    CodePage: 0; Charset:0;   MIMECharsetName:'us-ascii';    CharsetName:'ANSI_CHARSET'       ),
    (Name:'Western Europe (ISO)';          URLSuffix:'';    CodePage:28591; Charset:0;   MIMECharsetName:'iso-8859-1';        CharsetName:'ANSI_CHARSET'       ),
    (Name:'Western Europe (Windows)';      URLSuffix:'';    CodePage:01252; Charset:0;   MIMECharsetName:'windows-1252';      CharsetName:'ANSI_CHARSET'       ),
    (Name:'Latin (3)';                     URLSuffix:'';    CodePage:28593; Charset:162; MIMECharsetName:'iso-8859-3';        CharsetName:'TURKISH_CHARSET'     ),
    (Name:'Latin (9)';                     URLSuffix:'';    CodePage:28605; Charset:0;   MIMECharsetName:'iso-8859-15';       CharsetName:'ANSI_CHARSET'       ),
    (Name:'Thai';                          URLSuffix:'.th'; CodePage:00874; Charset:222; MIMECharsetName:'iso-8859-11';       CharsetName:'ARABIC_CHARSET'     ),
    (Name:'Japanese (JIS)';                URLSuffix:'.jp'; CodePage:50220; Charset:128; MIMECharsetName:'iso-2022-jp';       CharsetName:'SHIFTJIS_CHARSET'   ),
    (Name:'Japanese (EUC)';                URLSuffix:'';    CodePage:51932; Charset:128; MIMECharsetName:'euc-jp';          CharsetName:'SHIFTJIS_CHARSET'   ),
    (Name:'Japanese (EUC)';                URLSuffix:'';    CodePage:51932; Charset:128; MIMECharsetName:'x-euc-jp';          CharsetName:'SHIFTJIS_CHARSET'   ),
    (Name:'Japanese (Shift-JIS)';          URLSuffix:'';    CodePage:00932; Charset:128; MIMECharsetName:'shift-jis';    CharsetName:'SHIFTJIS_CHARSET'   ),
    (Name:'Japanese (Shift-JIS)';          URLSuffix:'';    CodePage:00932; Charset:128; MIMECharsetName:'x-shift-jis';    CharsetName:'SHIFTJIS_CHARSET'   ),
    (Name:'Chinese - Simplified (GB2312)'; URLSuffix:'';    CodePage:00936; Charset:134; MIMECharsetName:'gb2312';         CharsetName:'GB2312_CHARSET'     ),
    (Name:'Chinese - Simplified (HZ)';     URLSuffix:'';    CodePage:52936; Charset:134; MIMECharsetName:'hz-gb-2312';          CharsetName:'GB2312_CHARSET'     ),
    (Name:'Chinese - Traditional (Big5)';  URLSuffix:'.cn'; CodePage:00950; Charset:136; MIMECharsetName:'big5';              CharsetName:'CHINESEBIG5_CHARSET'),
    (Name:'Chinese - Traditional (Big5)';  URLSuffix:'';    CodePage:00950; Charset:136; MIMECharsetName:'x-big5';            CharsetName:'CHINESEBIG5_CHARSET'),
    (Name:'Korean';                        URLSuffix:'.kr'; CodePage:00949; Charset:129; MIMECharsetName:'iso-2022-kr';       CharsetName:'HANGEUL_CHARSET'    ),
    (Name:'Korean';                        URLSuffix:'';    CodePage:00949; Charset:129; MIMECharsetName:'KS_C_5601-1987';       CharsetName:'HANGEUL_CHARSET'    ),
    (Name:'Korean (EUC)';                  URLSuffix:'';    CodePage:51949; Charset:129; MIMECharsetName:'euc-kr';          CharsetName:'HANGEUL_CHARSET'    ),
    (Name:'Korean (EUC)';                  URLSuffix:'';    CodePage:51949; Charset:129; MIMECharsetName:'x-euc-kr';          CharsetName:'HANGEUL_CHARSET'    ),
    (Name:'Central European (ISO)';        URLSuffix:'';    CodePage:28592; Charset:238; MIMECharsetName:'iso-8859-2';        CharsetName:'EASTEUROPE_CHARSET' ),
    (Name:'Central European (Windows)';    URLSuffix:'';    CodePage:01250; Charset:238; MIMECharsetName:'windows-1250';        CharsetName:'EASTEUROPE_CHARSET' ),
    (Name:'Russian (KOI8-R)';              URLSuffix:'.ru'; CodePage:20878; Charset:204; MIMECharsetName:'koi8-r';            CharsetName:'RUSSIAN_CHARSET'    ),
    (Name:'Russian (KOI8-R)';              URLSuffix:'';    CodePage:20878; Charset:204; MIMECharsetName:'x-koi8-r';            CharsetName:'RUSSIAN_CHARSET'    ),
    (Name:'Russian (KOI8)';                URLSuffix:'';    CodePage:20866; Charset:204; MIMECharsetName:'koi8';            CharsetName:'RUSSIAN_CHARSET'    ),
    (Name:'Russian (KOI8)';                URLSuffix:'';    CodePage:20866; Charset:204; MIMECharsetName:'x-koi8';            CharsetName:'RUSSIAN_CHARSET'    ),
    (Name:'Russian (ISO)';                 URLSuffix:'';    CodePage:28595; Charset:204; MIMECharsetName:'iso-8859-5';        CharsetName:'RUSSIAN_CHARSET'    ),
    (Name:'Russian (Windows)';             URLSuffix:'';    CodePage:01251; Charset:204; MIMECharsetName:'windows-1251';              CharsetName:'RUSSIAN_CHARSET'    ),
    (Name:'Greek (ISO)';                   URLSuffix:'';    CodePage:28597; Charset:161; MIMECharsetName:'iso-8859-7';        CharsetName:'GREEK_CHARSET'      ),
    (Name:'Greek (Windows)';               URLSuffix:'.gr'; CodePage:01253; Charset:161; MIMECharsetName:'windows-1253';        CharsetName:'GREEK_CHARSET'      ),
    (Name:'Turkish (ISO)';                 URLSuffix:'.tr'; CodePage:28599; Charset:162; MIMECharsetName:'iso-8859-9';        CharsetName:'TURKISH_CHARSET'    ),
    (Name:'Hebrew (Windows)';              URLSuffix:'.il'; CodePage:01255; Charset:177; MIMECharsetName:'windows-1255';        CharsetName:'HEBREW_CHARSET'     ),
    (Name:'Hebrew (ISO)';                  URLSuffix:'';    CodePage:28598; Charset:177; MIMECharsetName:'iso-8859-9';        CharsetName:'HEBREW_CHARSET'     ),
    (Name:'Arabic (ISO)';                  URLSuffix:'';    CodePage:28596; Charset:178; MIMECharsetName:'iso-8859-6';        CharsetName:'ARABIC_CHARSET'     ),
    (Name:'Baltic (ISO)';                  URLSuffix:'';    CodePage:28594; Charset:186; MIMECharsetName:'iso-8859-4';        CharsetName:'BALTIC_CHARSET'     ),
    (Name:'Unicode (UTF-8)';               URLSuffix:'';    CodePage:65001; Charset:0;   MIMECharsetName:'utf-8';             CharsetName:'BALTIC_CHARSET'     )
  );

  CharsetMap : array of TCharsetRec;



procedure LoadMultilanguage;
type
  PMIMECPInfo = ^tagMIMECPInfo;
var
  enum : IEnumCodepage;
  p, info : PMIMECPInfo;
  i, j, c, ct : DWORD;
  found : boolean;
begin
  if (not gMultilanguageLoaded) or (giMultiLanguage = Nil) then
  begin
    gMultilanguageLoaded := True;
    gIMultiLanguage := CoCMultiLanguage.Create;
    gMultiLang := False;

    if Assigned (gIMultiLanguage) then
    begin
      gIMultiLanguage.EnumCodePages(MIMECONTF_MAILNEWS, enum);
      info := CoTaskMemAlloc (10* sizeof (tagMIMECPInfo));;
      try
        c := 2;
        while SUCCEEDED (enum.Next (10, info^, ct)) and (ct <> 0) do
        begin
          SetLength (CharsetMap, c + ct);
          if c = 2 then
          begin
            CharsetMap [0] := gCharsetMap [0];
            CharsetMap [1] := gCharsetMap [5]
          end;
          p := info;

          for i := 0 to ct - 1 do
          begin
            CharsetMap [i + c].name := p^.wszDescription;
            CharsetMap [i + c].CodePage := p^.uiCodePage;
            CharsetMap [i + c].CharSet  := p^.bGDICharset;
            CharsetMap [i + c].MIMECharsetName := p^.wszWebCharset;
            CharsetMap [i + c].CharsetName := '';
            Inc (p);
          end;
          Inc (c, ct);
        end;

        for i := 0 to c - 1 do
          for j := Low (gCharsetMap) to High (gCharsetMap) do
            if gCharsetMap [j].CodePage = charsetMap [i].CodePage then
            begin
              if charsetMap [i].URLSuffix = '' then
                charsetMap [i].URLSuffix := gCharsetMap [j].URLSuffix;
              if charsetMap [i].URLSuffix <> '' then
                break
            end;

        found := False;
        for i := 0 to c - 1 do
          if charsetMap [i].CodePage = DefaultCodePage then
          begin
            found := True;
            break
          end;

        if not Found then
          for i := Low (gCharsetMap) to High (gCharsetMap) do
            if gCharsetMap [i].CodePage = DefaultCodePage then
            begin
              SetLength (charsetMap, c + 1);
              charsetMap [c] := gCharsetMap [i];
              break;
            end;

      finally
        CoTaskMemFree (info)
      end;
      gMultiLang := True
    end
    else
    begin
      SetLength (CharsetMap, High (gCharsetMap) + 1);
      for i := Low (gCharsetMap) to High (gCharsetMap) do
        CharsetMap [i] := gCharsetMap [i]
    end
  end
end;

function MIMECharsetNameToCodepage (const MIMECharsetName : string) : Integer;
var
  i : Integer;
begin
  LoadMultiLanguage;

  if CompareText (MIMECharsetName, 'us-ascii') = 0 then
    result := CP_USASCII
  else
  begin
    result := 0;
    i := Pos ('-', MIMECharsetName);
    if i > 0 then
      if CompareText (Copy (MIMECharsetName, 1, i - 1), 'windows') = 0 then
        result := StrToIntDef (Copy (MIMECharsetName, i + 1, MaxInt), 1252);

    if result = 0 then
    begin
      result := CP_USASCII;
      for i := Low (CharsetMap) to High (CharsetMap) do
        if CompareText (CharsetMap [i].MIMECharsetName, MIMECharsetName) = 0 then
        begin
          result := CharsetMap [i].codepage;
          break
        end
    end
  end
end;

function CharsetNameToCodepage (const CharsetName : string) : Integer;
var
  i : Integer;
begin
  LoadMultiLanguage;
  result := CP_USASCII;
  for i := Low (CharsetMap) to High (CharsetMap) do
    if CompareText (CharsetMap [i].Name, CharsetName) = 0 then
    begin
      result := CharsetMap [i].codepage;
      break
    end
end;

function CodepageToMIMECharsetName (codepage : Integer) : string;
var
  i : Integer;
begin
  LoadMultiLanguage;
  result := '';

  for i := Low (CharsetMap) to High (CharsetMap) do
    if CharsetMap [i].Codepage = codepage then
    begin
      result := CharsetMap [i].MIMECharsetName;
      break
    end
end;

function CodepageToCharsetName (codepage : Integer) : string;
var
  i : Integer;
begin
  LoadMultiLanguage;
  result := '';

  for i := Low (CharsetMap) to High (CharsetMap) do
    if CharsetMap [i].Codepage = codepage then
    begin
      result := CharsetMap [i].Name;
      break
    end
end;

function CharsetToCodePage (FontCharset : TFontCharset) : Integer;
var
  i : Integer;
begin
  LoadMultiLanguage;
  result := CP_USASCII;
  for i := Low (CharsetMap) to High (CharsetMap) do
    if CharsetMap [i].CharSet = FontCharset then
    begin
      result := CharsetMap [i].CodePage;
      break
    end
end;

function CodePageToCharset (codePage : Integer) : TFontCharset;
var
  i : Integer;
begin
  LoadMultiLanguage;
  result := 0;
  if codepage <> 65001 then
    for i := Low (CharsetMap) to High (CharsetMap) do
      if CharsetMap [i].Codepage = codepage then
      begin
        result := CharsetMap [i].CharSet;
        break
      end
end;

function WideStringToString (const ws : WideString; codePage : Integer) : string;
var
  dlen, len : DWORD;
  mode : DWORD;
begin
  LoadMultiLanguage;

  len := Length (ws);
  dlen := len * 4;
  SetLength (result, dlen);  // Dest string may be longer than source string if it's UTF-8
  if codePage = -1 then

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -