⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uencoding.pas

📁 uEncoding字符串UNICODE处理单元 用于处理unicode国际通用
💻 PAS
📖 第 1 页 / 共 4 页
字号:
     Inc(Z);

      if FBigEndian then
      begin
        //BigEndian
        BS[3] := Lo((U4C shl 16) shr 16);
        BS[2] := Hi((U4C shl 16) shr 16);
        BS[1] := Lo(U4C shr 16);
        BS[0] := Hi(U4C shr 16);
      end
      else
      begin
        //LittleEndian
        BS[0] := Lo((U4C shl 16) shr 16);
        BS[1] := Hi((U4C shl 16) shr 16);
        BS[2] := Lo(U4C shr 16);
        BS[3] := Hi(U4C shr 16);
      end;
      Move(BS[0], Bytes^, SizeOf(Byte));
      Inc(Bytes);
      Move(BS[1], Bytes^, SizeOf(Byte));
      Inc(Bytes);
      Move(BS[2], Bytes^, SizeOf(Byte));
      Inc(Bytes);
      Move(BS[3], Bytes^, SizeOf(Byte));
      Inc(Bytes);
  end;
end;

function TUCS4Encoding.GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar;
  CharCount: Integer): Integer;
var
  U4C: UCS4Char;
  C: WideChar;
  i: Integer;
  BS: TBytes;
  tmp:PWideChar;
  Len:Integer;
begin
  Result := CharCount;
  Len:=CharCount div 2;
  GetMem(tmp, CharCount);
  ZeroMemory(tmp,CharCount);
  SetLength(BS,4);
  for i := 0 to Len - 1 do
  begin
    BS[0] := Bytes^;
    Inc(Bytes);
    BS[1] := Bytes^;
    Inc(Bytes);
    BS[2] := Bytes^;
    Inc(Bytes);
    BS[3] := Bytes^;
    Inc(Bytes);

    if FBigEndian then
    begin
      //BigEndian
      U4C := BS[0] shl 24 + BS[1] shl 16 + BS[2] shl 8 + BS[3];
    end
    else
    begin
      //LittleEndian
      U4C := BS[3] shl 24 + BS[2] shl 16 + BS[1] shl 8 + BS[0];
    end;

    C := ConvertFromUtf32(U4C)[1];
    tmp^:=C;
    //Move(C,tmp^,SizeOf(WideChar));
    Inc(tmp);
    if U4C > $FFFF then
    begin
      C := ConvertFromUtf32(U4C)[2];
      tmp^:=C;
      //Move(C,tmp^,SizeOf(WideChar));
      Inc(tmp);
    end;
  end;
  Dec(tmp,Len);
  WideCharToMultiByte(CP_ACP, 0, tmp, -1, Chars, Result, nil, nil);
  FreeMem(tmp);
end;

{$ENDIF UNICODE}

function TUCS4Encoding.GetMaxByteCount(CharCount: Integer): Integer;
begin
  if FBOMEnable then
  begin
    Result := (CharCount + 1) * 4;
  end
  else
  begin
    Result := CharCount * 4;
  end;
end;

function TUCS4Encoding.GetMaxCharCount(ByteCount: Integer): Integer;
begin
  Result := ByteCount div 4;

  if FBOMEnable then
  begin
    Result := Result + 1;
  end;
end;

function TUCS4Encoding.GetPreamble: TBytes;
begin
  if FBOMEnable = True then
  begin
    SetLength(Result, 4);
    if FBigEndian = True then
    begin
      //BigEndian
      Result[0] := $00;
      Result[1] := $00;
      Result[2] := $FE;
      Result[3] := $FF;
    end
    else
    begin
      //LittleEndian
      Result[0] := $FF;
      Result[1] := $FE;
      Result[2] := $00;
      Result[3] := $00;
    end;
  end
  else
  begin
    SetLength(Result, 0);
  end;
end;

{$ENDIF UCS4_ENCODING_SUPPORT}


{CheckBom}
function HasUTF16LEBOM(S: TStream): Boolean;
var
  SavePos: Int64;
  Buf: array[1..2] of AnsiChar;
begin
  Result := False;
  if HasUTF32LEBOM(S) then
    Exit;
  SavePos := S.Position;
  try
    S.Seek(0, soBeginning);
    if S.Read(Buf, 2) = 2 then
      Result := (Buf[1] = UTF16LEString[1])
        and (Buf[2] = UTF16LEString[2]);
  finally
    S.Position := SavePos;
  end;
end;

function HasUTF16LEBOM(S: AnsiString): Boolean;
begin
  Result := False;
  if HasUTF32LEBOM(S) then
    Exit;
  if Length(S) < 2 then exit;
  if S[1] <> UTF16LEString[1] then exit;
  if S[2] <> UTF16LEString[2] then exit;
  Result := True;
end;

function HasUTF16BEBOM(S: TStream): Boolean;
var
  SavePos: Int64;
  Buf: array[1..2] of AnsiChar;
begin
  SavePos := S.Position;
  Result := False;
  try
    S.Seek(0, soBeginning);
    if S.Read(Buf, 2) = 2 then
      Result := (Buf[1] = UTF16BEString[1])
        and (Buf[2] = UTF16BEString[2]);
  finally
    S.Position := SavePos;
  end;
end;

function HasUTF16BEBOM(S: AnsiString): Boolean;
begin
  Result := False;
  if Length(S) < 2 then exit;
  if S[1] <> UTF16BEString[1] then exit;
  if S[2] <> UTF16BEString[2] then exit;
  Result := True;
end;

function HasUTF8BOM(S: TStream): Boolean;
var
  SavePos: Int64;
  Buf: array[1..3] of AnsiChar;
begin
  SavePos := S.Position;
  Result := False;
  try
    S.Seek(0, soBeginning);
    if S.Read(Buf, 3) = 3 then
      Result := (Buf[1] = UTF8BOMString[1])
        and (Buf[2] = UTF8BOMString[2])
        and (Buf[3] = UTF8BOMString[3]);
  finally
    S.Position := SavePos;
  end;
end;

function HasUTF8BOM(S: AnsiString): Boolean;
begin
  Result := False;
  if Length(S) < 3 then exit;
  if S[1] <> UTF8BOMString[1] then exit;
  if S[2] <> UTF8BOMString[2] then exit;
  if S[3] <> UTF8BOMString[3] then exit;
  Result := True;
end;

function HasUTF32LEBOM(S: TStream): Boolean;
var
  SavePos: Int64;
  Buf: array[1..4] of AnsiChar;
begin
  SavePos := S.Position;
  Result := False;
  try
    S.Seek(0, soBeginning);
    if S.Read(Buf, 4) = 4 then
      Result := (Buf[1] = UTF32LEString[1])
        and (Buf[2] = UTF32LEString[2])
        and (Buf[3] = UTF32LEString[3])
        and (Buf[4] = UTF32LEString[4]);
  finally
    S.Position := SavePos;
  end;
end;

function HasUTF32LEBOM(S: AnsiString): Boolean;
begin
  Result := False;
  if Length(S) < 4 then exit;
  if S[1] <> UTF32LEString[1] then exit;
  if S[2] <> UTF32LEString[2] then exit;
  if S[3] <> UTF32LEString[3] then exit;
  if S[4] <> UTF32LEString[4] then exit;
  Result := True;
end;

function HasUTF32BEBOM(S: TStream): Boolean;
var
  SavePos: Int64;
  Buf: array[1..4] of AnsiChar;
begin
  SavePos := S.Position;
  Result := False;
  try
    S.Seek(0, soBeginning);
    if S.Read(Buf, 4) = 4 then
      Result := (Buf[1] = UTF32BEString[1])
        and (Buf[2] = UTF32BEString[2])
        and (Buf[3] = UTF32BEString[3])
        and (Buf[4] = UTF32BEString[4]);
  finally
    S.Position := SavePos;
  end;
end;

function HasUTF32BEBOM(S: AnsiString): Boolean;
begin
  Result := False;
  if Length(S) < 4 then exit;
  if S[1] <> UTF32BEString[1] then exit;
  if S[2] <> UTF32BEString[2] then exit;
  if S[3] <> UTF32BEString[3] then exit;
  if S[4] <> UTF32BEString[4] then exit;
  Result := True;
end;


function GetEncodeFromStream(Stream: TStream;var CodePage:LongInt): Boolean;
begin
  Result := True;
  if HasUTF32LEBOM(Stream) then
    CodePage := 65005
  else if HasUTF32BEBOM(Stream) then
    CodePage := 65006
  else if HasUTF16LEBOM(Stream) then
    CodePage := 1200
  else if HasUTF16BEBOM(Stream) then
    CodePage := 1201
  else if HasUTF8BOM(Stream) then
    CodePage := 65001
  else Result := False;
end;

function GetCodePageFromFile(const FileName:TFileName;var DefCodePage:LongInt):Boolean;
var
  m: TMemoryStream;
  C:LongInt;
begin
  m := TMemoryStream.Create;
  try
   m.LoadFromFile(FileName);
   Result:=GetEncodeFromStream(m, C);
   if Result then
      DefCodePage:=C;
  finally
    m.Free;
  end;
end;

function GetEncoding(CodePage: Integer): TEncoding;
begin
   case CodePage of
     1200:Result := TUnicodeEncoding.Create;
     1201:Result := TBigEndianUnicodeEncoding.Create;
     65000:Result := TUTF7Encoding.Create;
     65001:Result := TUTF8Encoding.Create;
     {$IFDEF UCS4_ENCODING_SUPPORT}
     12000,65005:Result := TUCS4Encoding.Create;
     12001,65006:Result := TUCS4Encoding.Create(True);
     {$ENDIF}
     else
      Result := TMBCSEncoding.Create(CodePage);
   end;
end;

function GetEnCodingFromFile(const FileName:TFileName;DefCodePage:LongInt=0):TEncoding;
var
  m: TMemoryStream;
  C:LongInt;
begin
  m := TMemoryStream.Create;
  try
   m.LoadFromFile(FileName);
   if GetEncodeFromStream(m, C) then
      Result:=GetEncoding(C)
   else Result:=GetEncoding(DefCodePage);
  finally
    m.Free;
  end;
end;

function StrToFile(const FileName, S: String): Boolean;
begin
  Result:=StrToFile(FileName, S,nil);
end;

function StrToFile(const FileName, S: String;Encoding:TEncoding): Boolean;
var
  Stream: TStream;
begin
  Stream := TFileStream.Create(FileName, fmCreate);
  try
    StrSaveToStream(S,Stream, Encoding);
  finally
    Stream.Free;
  end;
end;

procedure StrSaveToStream(const S:string;Stream: TStream; Encoding: TEncoding);
var
  Buffer, Preamble: TBytes;
begin
  if Encoding = nil then
    Encoding := TEncoding.Default;
  Buffer := Encoding.GetBytes(S);
  Preamble := Encoding.GetPreamble;
  if Length(Preamble) > 0 then
    Stream.WriteBuffer(Preamble[0], Length(Preamble));
  Stream.WriteBuffer(Buffer[0], Length(Buffer));
end;

function FileToStr(const FileName: string):string;
begin
   Result:=FileToStr(FileName,nil);
end;

function FileToStr(const FileName: string;Encoding:TEncoding):string;
var
  Stream: TStream;
begin
  Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
  try
    Result:=StrLoadFromStream(Stream, Encoding);
  finally
    Stream.Free;
  end;
end;

function FileToString(const FileName: string):RawByteString;
var
  fs: TFileStream;
  Len: Integer;
begin
  fs := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
  try
    Len := fs.Size;
    SetLength(Result, Len);
    if Len > 0 then
      fs.ReadBuffer(Result[1], Len);
  finally
    fs.Free;
  end;
 end;

function StrLoadFromStream(Stream: TStream; Encoding: TEncoding):String;
var
  Size: Integer;
  Buffer: TBytes;
begin
  try
    Size := Stream.Size - Stream.Position;
    SetLength(Buffer, Size);
    Stream.Read(Buffer[0], Size);
    Size := TEncoding.GetBufferEncoding(Buffer, Encoding);
    Result:=Encoding.GetString(Buffer, Size, Length(Buffer) - Size);
  finally

  end;
end;

{Unicode}

function UnicodeEncode(Str: string; CodePage: integer): WideString;
var
  Len: integer;
begin
  Len := Length(Str)* SizeOf(Char) + 1;
  SetLength(Result, Len);
  Len := MultiByteToWideChar(CodePage, 0, PAnsiChar(Str), -1, PWideChar(Result), Len);
  SetLength(Result, Len - 1); //end is #0
end;

function UnicodeDecode(Str: WideString; CodePage: integer): string;
var
  Len: integer;
begin
  Len := Length(Str) * 2 + 1; //one for #0
  SetLength(Result, Len);
  Len := WideCharToMultiByte(CodePage, 0, PWideChar(Str), -1, PAnsiChar(Result), Len, nil, nil);
  SetLength(Result, Len - 1);
end;

function Gb2Big5(Str: string): string;
var
  Len:integer;
begin
    {UNICODE下只进行字形转换,需要在保存的时候,指定代码页为950即可}
    SetLength(Result, Length(Str));
    LCMapString(GetUserDefaultLCID, LCMAP_TRADITIONAL_CHINESE,
    PChar(Str), Length(Str),
    PChar(Result), Length(Result));
  {$IFNDEF UNICODE}
    Result := UnicodeDecode(UnicodeEncode(Result, 936), 950);
  {$ENDIF}
end;

function Big52Gb(Str: string): string;
var
  Len:Integer;
begin
  {$IFNDEF UNICODE}
  Str := UnicodeDecode(UnicodeEncode(Str, 950), 936);
  {$ENDIF}
  SetLength(Result, Length(Str));
  LCMapString(GetUserDefaultLCID, LCMAP_SIMPLIFIED_CHINESE,
    PChar(Str), Length(Str),
    PChar(Result), Length(Result));
  {UNICODE下只进行字形转换,需要在保存的时候,指定代码页为936即可}

end;

function GBCht2Chs(const S: string): string;
var
  Len: Integer;
  pGBCHTChar: PChar;
  pGBCHSChar: PChar;
begin
  pGBCHTChar := PChar(S);
  {$IFDEF UNICODE}
   Len:=Length(S);
  {$ELSE}
   Len := MultiByteToWideChar(936, 0, pGBCHTChar, -1, nil, 0);
  {$ENDIF}
   GetMem(pGBCHSChar, Len * SizeOf(WideChar)  + 1);
   ZeroMemory(pGBCHSChar, Len * SizeOf(WideChar)  + 1);
  //GB CHS -> GB CHT
  LCMapString($804, LCMAP_SIMPLIFIED_CHINESE, pGBCHTChar, -1, pGBCHSChar, Len * SizeOf(WideChar));
  Result := string(pGBChsChar);
  FreeMem(pGBCHSChar);
end;

function GBChs2Cht(const S: string): string;
var
  Len: Integer;
  pGBCHTChar: PChar;
  pGBCHSChar: PChar;
begin
  pGBCHSChar := PChar(S);
  {$IFDEF UNICODE}
  Len:=Length(S);
  {$ELSE}
  Len := MultiByteToWideChar(936, 0, pGBCHSChar, -1, nil, 0);
  {$ENDIF}
  GetMem(pGBCHTChar, Len * SizeOf(WideChar)  + 1);
  ZeroMemory(pGBCHTChar, Len * SizeOf(WideChar)  + 1);
  //GB CHS -> GB CHT
  LCMapString($804, LCMAP_TRADITIONAL_CHINESE, pGBCHSChar, -1, pGBCHTChar, Len * SizeOf(WideChar));
  Result := string(pGBCHTChar);
  FreeMem(pGBCHTChar);
end;



end.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -