⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 uencoding.pas

📁 uEncoding字符串UNICODE处理单元 用于处理unicode国际通用
💻 PAS
📖 第 1 页 / 共 4 页
字号:
{* |<PRE>
================================================================================
* 软件名称:FcpEditor
* 单元名称:uEncoding
* 单元作者:Senfore
* 备    注:UNICODE读写支持
* 开发平台:Windows Xp Sp3 + Delphi 2007/Delphi2009
* 兼容测试:
* 本 地 化:使Delphi2007能使用Delphi2009的Encoding方式处理文本
* 单元标识:$Id: uEncoding.pas,v 1.1 2009/01/28
* 修改记录:
*           2009.01.28 V1.1
*               让Delphi2007环境下支持UTF32格式读写
*           2009.01.06 V1.0
*               创建单元,实现功能
================================================================================
|</PRE>}

unit uEncoding;

{$DEFINE UCS4_ENCODING_SUPPORT}

interface

uses Classes,Windows,SysUtils;

{$IFNDEF UNICODE}
function InterlockedCompareExchangePointer(var Destination: Pointer; Exchange: Pointer; Comperand: Pointer): Pointer;
{$ENDIF}

{$IFNDEF UNICODE}
type
   UnicodeString=WideString;
   RawByteString=AnsiString;
{$ENDIF}

 {$IFNDEF UNICODE}
type
  TCharArray = array of Char;

  EEncodingError = class(Exception);

  TEncoding = class
  strict private
    class var
      FASCIIEncoding: TEncoding;
      FBigEndianUnicodeEncoding: TEncoding;
      FDefaultEncoding: TEncoding;
      FUnicodeEncoding: TEncoding;
      FUTF7Encoding: TEncoding;
      FUTF8Encoding: TEncoding;
    class function GetASCII: TEncoding; static;
    class function GetBigEndianUnicode: TEncoding; static;
    class function GetDefault: TEncoding; static;
    class function GetUnicode: TEncoding; static;
    class function GetUTF7: TEncoding; static;
    class function GetUTF8: TEncoding; static;
  strict protected
    FIsSingleByte: Boolean;
    FMaxCharSize: Integer;
    function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; virtual; abstract;
    function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; virtual; abstract;
    function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; virtual; abstract;
    function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; virtual; abstract;
  public
    class function Convert(Source, Destination: TEncoding; Bytes: TBytes): TBytes; overload;
    class function Convert(Source, Destination: TEncoding; Bytes: TBytes; StartIndex, Count: Integer): TBytes; overload;
    class procedure FreeEncodings;
    class function IsStandardEncoding(AEncoding: TEncoding): Boolean; static;
    class function GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer; static;
    function GetByteCount(const Chars: TCharArray): Integer; overload;
    function GetByteCount(const Chars: TCharArray; CharIndex, CharCount: Integer): Integer; overload;
    function GetByteCount(const S: string): Integer; overload;
    function GetByteCount(const S: string; CharIndex, CharCount: Integer): Integer; overload;
    function GetBytes(const Chars: TCharArray): TBytes; overload;
    function GetBytes(const Chars: TCharArray; CharIndex, CharCount: Integer;
      var Bytes: TBytes; ByteIndex: Integer): Integer; overload;
    function GetBytes(const S: string): TBytes; overload;
    function GetBytes(const S: string; CharIndex, CharCount: Integer;
      var Bytes: TBytes; ByteIndex: Integer): Integer; overload;
    function GetCharCount(const Bytes: TBytes): Integer; overload;
    function GetCharCount(const Bytes: TBytes; ByteIndex, ByteCount: Integer): Integer; overload;
    function GetChars(const Bytes: TBytes): TCharArray; overload;
    function GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer): TCharArray; overload;
    function GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer;
      var Chars: TCharArray; CharIndex: Integer): Integer; overload;
    class function GetEncoding(CodePage: Integer): TEncoding; static;
    function GetMaxByteCount(CharCount: Integer): Integer; virtual; abstract;
    function GetMaxCharCount(ByteCount: Integer): Integer; virtual; abstract;
    function GetPreamble: TBytes; virtual; abstract;
    function GetString(const Bytes: TBytes): string; overload;
    function GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): string; overload;
    class property ASCII: TEncoding read GetASCII;
    class property BigEndianUnicode: TEncoding read GetBigEndianUnicode;
    class property Default: TEncoding read GetDefault;
    property IsSingleByte: Boolean read FIsSingleByte;
    class property Unicode: TEncoding read GetUnicode;
    class property UTF7: TEncoding read GetUTF7;
    class property UTF8: TEncoding read GetUTF8;
  end;

 TMBCSEncoding = class(TEncoding)
  private
    FCodePage: Cardinal;
    FMBToWCharFlags: Cardinal;
    FWCharToMBFlags: Cardinal;
  strict protected
    function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
    function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
  public
    constructor Create; overload; virtual;
    constructor Create(CodePage: Integer); overload; virtual;
    constructor Create(CodePage, MBToWCharFlags, WCharToMBFlags: Integer); overload; virtual;
    function GetMaxByteCount(CharCount: Integer): Integer; override;
    function GetMaxCharCount(ByteCount: Integer): Integer; override;
    function GetPreamble: TBytes; override;
  end;

  TUTF7Encoding = class(TMBCSEncoding)
  strict protected
    function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
    function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
  public
    constructor Create; override;
    function GetMaxByteCount(CharCount: Integer): Integer; override;
    function GetMaxCharCount(ByteCount: Integer): Integer; override;
  end;

  TUTF8Encoding = class(TUTF7Encoding)
  public
    constructor Create; override;
    function GetMaxByteCount(CharCount: Integer): Integer; override;
    function GetMaxCharCount(ByteCount: Integer): Integer; override;
    function GetPreamble: TBytes; override;
  end;

  TUnicodeEncoding = class(TEncoding)
  strict protected
    function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
    function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
  public
    constructor Create; virtual;
    function GetMaxByteCount(CharCount: Integer): Integer; override;
    function GetMaxCharCount(ByteCount: Integer): Integer; override;
    function GetPreamble: TBytes; override;
  end;

  TBigEndianUnicodeEncoding = class(TUnicodeEncoding)
  strict protected
    function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
  public
    function GetPreamble: TBytes; override;
  end;

  TStringsEx=class(TStrings)
  private

  public
    procedure LoadFromFile(const FileName: string); overload; virtual;
    procedure LoadFromFile(const FileName: string; Encoding: TEncoding); overload; virtual;
    procedure LoadFromStream(Stream: TStream); overload; virtual;
    procedure LoadFromStream(Stream: TStream; Encoding: TEncoding); overload; virtual;
    procedure SaveToFile(const FileName: string); overload; virtual;
    procedure SaveToFile(const FileName: string; Encoding: TEncoding); overload; virtual;
    procedure SaveToStream(Stream: TStream); overload; virtual;
    procedure SaveToStream(Stream: TStream; Encoding: TEncoding); overload; virtual;
  end;

{ TStringList class }
  TStringListEx = class;
  TStringListExSortCompare = function(List: TStringListEx; Index1, Index2: Integer): Integer;

  TStringListEx = class(TStringsEx)
  private
    FList: PStringItemList;
    FCount: Integer;
    FCapacity: Integer;
    FSorted: Boolean;
    FDuplicates: TDuplicates;
    FCaseSensitive: Boolean;
    FOnChange: TNotifyEvent;
    FOnChanging: TNotifyEvent;
    procedure ExchangeItems(Index1, Index2: Integer);
    procedure Grow;
    procedure QuickSort(L, R: Integer; SCompare: TStringListExSortCompare);
    procedure SetSorted(Value: Boolean);
    procedure SetCaseSensitive(const Value: Boolean);
  protected
    procedure Changed; virtual;
    procedure Changing; virtual;
    function Get(Index: Integer): string; override;
    function GetCapacity: Integer; override;
    function GetCount: Integer; override;
    function GetObject(Index: Integer): TObject; override;
    procedure Put(Index: Integer; const S: string); override;
    procedure PutObject(Index: Integer; AObject: TObject); override;
    procedure SetCapacity(NewCapacity: Integer); override;
    procedure SetUpdateState(Updating: Boolean); override;
    function CompareStrings(const S1, S2: string): Integer; override;
    procedure InsertItem(Index: Integer; const S: string; AObject: TObject); virtual;
  public
    destructor Destroy; override;
    function Add(const S: string): Integer; override;
    function AddObject(const S: string; AObject: TObject): Integer; override;
    procedure Clear; override;
    procedure Delete(Index: Integer); override;
    procedure Exchange(Index1, Index2: Integer); override;
    function Find(const S: string; var Index: Integer): Boolean; virtual;
    function IndexOf(const S: string): Integer; override;
    procedure Insert(Index: Integer; const S: string); override;
    procedure InsertObject(Index: Integer; const S: string;
      AObject: TObject); override;
    procedure Sort; virtual;
    procedure CustomSort(Compare: TStringListExSortCompare); virtual;
    property Duplicates: TDuplicates read FDuplicates write FDuplicates;
    property Sorted: Boolean read FSorted write SetSorted;
    property CaseSensitive: Boolean read FCaseSensitive write SetCaseSensitive;
    property OnChange: TNotifyEvent read FOnChange write FOnChange;
    property OnChanging: TNotifyEvent read FOnChanging write FOnChanging;
  end; 

  {$ENDIF}

{$IFDEF UCS4_ENCODING_SUPPORT}
type
  TUCS4Encoding = class(TEncoding)
  strict protected
    FBOMEnable: Boolean;
    FBigEndian: Boolean;
    function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
    function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
    function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
  public
    constructor Create; overload; virtual;
    constructor Create(BigEndian: Boolean); overload; virtual;
    constructor Create(BigEndian, BOMEnable: Boolean); overload; virtual;
    function GetMaxByteCount(CharCount: Integer): Integer; override;
    function GetMaxCharCount(ByteCount: Integer): Integer; override;
    function GetPreamble: TBytes; override;
  end;

{$ENDIF UCS4_ENCODING_SUPPORT}

{CheckBom}
  function HasUTF16LEBOM(S: TStream): Boolean; overload;
  function HasUTF16LEBOM(S: AnsiString): Boolean; overload;
  function HasUTF16BEBOM(S: TStream): Boolean; overload;
  function HasUTF16BEBOM(S: AnsiString): Boolean; overload;
  function HasUTF8BOM(S: TStream): Boolean; overload;
  function HasUTF8BOM(S: AnsiString): Boolean; overload;
  function HasUTF32LEBOM(S: TStream): Boolean; overload;
  function HasUTF32LEBOM(S: AnsiString): Boolean; overload;
  function HasUTF32BEBOM(S: TStream): Boolean; overload;
  function HasUTF32BEBOM(S: AnsiString): Boolean; overload;
{Encoding}
  function GetEncodeFromStream(Stream: TStream;var CodePage:LongInt): Boolean;
  function GetCodePageFromFile(const FileName:TFileName;var DefCodePage:LongInt):Boolean;
  function GetEncoding(CodePage: Integer): TEncoding;
  function GetEnCodingFromFile(const FileName:TFileName;DefCodePage:LongInt=0):TEncoding;
{File<->Str}
  function StrToFile(const FileName, S: String): Boolean;overload;
  function StrToFile(const FileName, S: String;Encoding:TEncoding): Boolean;overload;
  procedure StrSaveToStream(const S:string;Stream: TStream; Encoding: TEncoding);
  function FileToStr(const FileName: string):string;overload;
  function FileToStr(const FileName: string; Encoding: TEncoding):string;overload;
  function FileToString(const FileName: string): RawByteString;
  function StrLoadFromStream(Stream: TStream; Encoding: TEncoding):string;
{Unicode}
  function UnicodeEncode(Str: string; CodePage: integer): WideString;
  function UnicodeDecode(Str: WideString; CodePage: integer): string;
  function Gb2Big5(Str: string): string;
  function Big52Gb(Str: string): string;
  function GBCHT2CHS(const S: string): string;
  function GBCHS2CHT(const S: string): string;


implementation

uses SysConst,RTLConsts,
     {$IFDEF UNICODE}
       {$IFDEF UCS4_ENCODING_SUPPORT}
       Character;
       {$ENDIF UCS4_ENCODING_SUPPORT}
     {$ELSE UNICODE}
       {$IFDEF UCS4_ENCODING_SUPPORT}
       uCharacter;
       {$ENDIF UCS4_ENCODING_SUPPORT}
     {$ENDIF UNICODE}

const
  UTF16LEString: array[1..2] of AnsiChar = (#$FF, #$FE);
  UTF16BEString: array[1..2] of AnsiChar = (#$FE, #$FF);
  UTF8BOMString: array[1..3] of AnsiChar = (#$EF, #$BB, #$BF);
  UTF32LEString: array[1..4] of AnsiChar = (#$FF, #$FE, #$00, #$00);
  UTF32BEString: array[1..4] of AnsiChar = (#$00, #$00, #$FE, #$FF);

{$IFNDEF UNICODE}
resourcestring
  // Used by TEncoding
  SInvalidSourceArray = 'Invalid source array';
  SInvalidDestinationArray = 'Invalid destination array';
  SCharIndexOutOfBounds = 'Character index out of bounds (%d)';
  SByteIndexOutOfBounds = 'Start index out of bounds (%d)';
  SInvalidCharCount = 'Invalid count (%d)';
  SInvalidDestinationIndex = 'Invalid destination index (%d)';
  SInvalidCodePage = 'Invalid code page';  
{$ENDIF}

{$IFNDEF UNICODE}
function InterlockedCompareExchangePointer(var Destination: Pointer; Exchange: Pointer; Comperand: Pointer): Pointer;
begin
  Result := Pointer(InterlockedCompareExchange(Integer(Destination), Integer(Exchange), Integer(Comperand)));
end;
{$ENDIF}

{$IFNDEF UNICODE}
{ TEncoding }

class function TEncoding.Convert(Source, Destination: TEncoding; Bytes: TBytes): TBytes;
begin
  Result := Destination.GetBytes(Source.GetChars(Bytes));
end;

class function TEncoding.Convert(Source, Destination: TEncoding; Bytes: TBytes;
  StartIndex, Count: Integer): TBytes;
begin
  Result := Destination.GetBytes(Source.GetChars(Bytes, StartIndex, Count));
end;

class procedure TEncoding.FreeEncodings;
begin
  FreeAndNil(FDefaultEncoding);
  FreeAndNil(FASCIIEncoding);
  FreeAndNil(FUTF7Encoding);
  FreeAndNil(FUTF8Encoding);
  FreeAndNil(FUnicodeEncoding);
  FreeAndNil(FBigEndianUnicodeEncoding);
end;

class function TEncoding.GetASCII: TEncoding;
var
  LEncoding: TEncoding;
begin
  if FASCIIEncoding = nil then
  begin
    LEncoding := TMBCSEncoding.Create(20127, 0, 0);
    if InterlockedCompareExchangePointer(Pointer(FASCIIEncoding), LEncoding, nil) <> nil then
      LEncoding.Free;
  end;
  Result := FASCIIEncoding;
end;

class function TEncoding.GetBigEndianUnicode: TEncoding;
var
  LEncoding: TEncoding;
begin
  if FBigEndianUnicodeEncoding = nil then
  begin
    LEncoding := TBigEndianUnicodeEncoding.Create;
    if InterlockedCompareExchangePointer(Pointer(FBigEndianUnicodeEncoding), LEncoding, nil) <> nil then
      LEncoding.Free;
  end;
  Result := FBigEndianUnicodeEncoding;
end;

class function TEncoding.GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer;

  function ContainsPreamble(const Buffer, Signature: TBytes): Boolean;
  var
    I: Integer;
  begin
    Result := True;
    if Length(Buffer) >= Length(Signature) then
    begin
      for I := 1 to Length(Signature) do
        if Buffer[I - 1] <> Signature [I - 1] then
        begin
          Result := False;
          Break;
        end;
    end
    else
      Result := False;
  end;

var
  Preamble: TBytes;
begin
  Result := 0;
  if AEncoding = nil then
  begin
    // Find the appropraite encoding
    if ContainsPreamble(Buffer, TEncoding.Unicode.GetPreamble) then
      AEncoding := TEncoding.Unicode
    else
      if ContainsPreamble(Buffer, TEncoding.BigEndianUnicode.GetPreamble) then
        AEncoding := TEncoding.BigEndianUnicode
      else
        if ContainsPreamble(Buffer, TEncoding.UTF8.GetPreamble) then
          AEncoding := TEncoding.UTF8
        else
          AEncoding := TEncoding.Default;
    Result := Length(AEncoding.GetPreamble);
  end else
  begin
    Preamble := AEncoding.GetPreamble;
    if ContainsPreamble(Buffer, Preamble) then
      Result := Length(Preamble);
  end;
end;

function TEncoding.GetByteCount(const Chars: TCharArray): Integer;
begin
  Result := GetByteCount(Chars, 0, Length(Chars));
end;

function TEncoding.GetByteCount(const Chars: TCharArray; CharIndex, CharCount: Integer): Integer;
begin
  if CharIndex < 0 then
    raise EEncodingError.CreateResFmt(@SCharIndexOutOfBounds, [CharIndex]);
  if CharCount < 0 then
    raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
  if (Length(Chars) - CharIndex) < CharCount then
    raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);

  Result := GetByteCount(@Chars[CharIndex], CharCount);
end;

function TEncoding.GetByteCount(const S: string): Integer;
begin
  Result := GetByteCount(PChar(S), Length(S));
end;

function TEncoding.GetByteCount(const S: string; CharIndex, CharCount: Integer): Integer;
begin
  if CharIndex < 1 then
    raise EEncodingError.CreateResFmt(@SCharIndexOutOfBounds, [CharIndex]);
  if CharCount < 0 then
    raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
  if (Length(S) - CharIndex + 1) < CharCount then
    raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);

  Result := GetByteCount(PChar(@S[CharIndex]), CharCount);
end;

function TEncoding.GetBytes(const Chars: TCharArray): TBytes;
var
  Len: Integer;
begin
  Len := GetByteCount(Chars);
  SetLength(Result, Len);
  GetBytes(Chars, 0, Length(Chars), Result, 0);
end;

function TEncoding.GetBytes(const Chars: TCharArray; CharIndex, CharCount: Integer;
  var Bytes: TBytes; ByteIndex: Integer): Integer;
var
  Len: Integer;
begin
  if (Chars = nil) and (CharCount <> 0) then
    raise EEncodingError.CreateRes(@SInvalidSourceArray);
  if (Bytes = nil) and (CharCount <> 0) then
    raise EEncodingError.CreateRes(@SInvalidDestinationArray);
  if CharIndex < 0 then
    raise EEncodingError.CreateResFmt(@SCharIndexOutOfBounds, [CharIndex]);
  if CharCount < 0 then
    raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
  if (Length(Chars) - CharIndex) < CharCount then
    raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
  Len := Length(Bytes);
  if (ByteIndex < 0) or (ByteIndex > Len) then
    raise EEncodingError.CreateResFmt(@SInvalidDestinationIndex, [ByteIndex]);
  if Len - ByteIndex < GetByteCount(Chars, CharIndex, CharCount) then
    raise EEncodingError.CreateRes(@SInvalidDestinationArray);

  Result := GetBytes(@Chars[CharIndex], CharCount, @Bytes[ByteIndex], Len - ByteIndex);
end;

function TEncoding.GetBytes(const S: string): TBytes;
var
  Len: Integer;
begin
  Len := GetByteCount(S);
  SetLength(Result, Len);
  GetBytes(S, 1, Length(S), Result, 0);
end;

function TEncoding.GetBytes(const S: string; CharIndex, CharCount: Integer;
  var Bytes: TBytes; ByteIndex: Integer): Integer;
var
  Len: Integer;
begin
  if (Bytes = nil) and (CharCount <> 0) then
    raise EEncodingError.CreateRes(@SInvalidSourceArray);
  if CharIndex < 1 then
    raise EEncodingError.CreateResFmt(@SCharIndexOutOfBounds, [CharIndex]);
  if CharCount < 0 then
    raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
  if (Length(S) - CharIndex + 1) < CharCount then
    raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
  Len := Length(Bytes);
  if (ByteIndex < 0) or (ByteIndex > Len) then
    raise EEncodingError.CreateResFmt(@SInvalidDestinationIndex, [ByteIndex]);
  if Len - ByteIndex < GetByteCount(S, CharIndex, CharCount) then
    raise EEncodingError.CreateRes(@SInvalidDestinationArray);

  Result := GetBytes(@S[CharIndex], CharCount, @Bytes[ByteIndex], Len - ByteIndex);
end;

function TEncoding.GetCharCount(const Bytes: TBytes): Integer;
begin
  Result := GetCharCount(Bytes, 0, Length(Bytes));
end;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -