📄 uencoding.pas
字号:
{* |<PRE>
================================================================================
* 软件名称:FcpEditor
* 单元名称:uEncoding
* 单元作者:Senfore
* 备 注:UNICODE读写支持
* 开发平台:Windows Xp Sp3 + Delphi 2007/Delphi2009
* 兼容测试:
* 本 地 化:使Delphi2007能使用Delphi2009的Encoding方式处理文本
* 单元标识:$Id: uEncoding.pas,v 1.1 2009/01/28
* 修改记录:
* 2009.01.28 V1.1
* 让Delphi2007环境下支持UTF32格式读写
* 2009.01.06 V1.0
* 创建单元,实现功能
================================================================================
|</PRE>}
unit uEncoding;
{$DEFINE UCS4_ENCODING_SUPPORT}
interface
uses Classes,Windows,SysUtils;
{$IFNDEF UNICODE}
function InterlockedCompareExchangePointer(var Destination: Pointer; Exchange: Pointer; Comperand: Pointer): Pointer;
{$ENDIF}
{$IFNDEF UNICODE}
type
UnicodeString=WideString;
RawByteString=AnsiString;
{$ENDIF}
{$IFNDEF UNICODE}
type
TCharArray = array of Char;
EEncodingError = class(Exception);
TEncoding = class
strict private
class var
FASCIIEncoding: TEncoding;
FBigEndianUnicodeEncoding: TEncoding;
FDefaultEncoding: TEncoding;
FUnicodeEncoding: TEncoding;
FUTF7Encoding: TEncoding;
FUTF8Encoding: TEncoding;
class function GetASCII: TEncoding; static;
class function GetBigEndianUnicode: TEncoding; static;
class function GetDefault: TEncoding; static;
class function GetUnicode: TEncoding; static;
class function GetUTF7: TEncoding; static;
class function GetUTF8: TEncoding; static;
strict protected
FIsSingleByte: Boolean;
FMaxCharSize: Integer;
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; virtual; abstract;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; virtual; abstract;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; virtual; abstract;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; virtual; abstract;
public
class function Convert(Source, Destination: TEncoding; Bytes: TBytes): TBytes; overload;
class function Convert(Source, Destination: TEncoding; Bytes: TBytes; StartIndex, Count: Integer): TBytes; overload;
class procedure FreeEncodings;
class function IsStandardEncoding(AEncoding: TEncoding): Boolean; static;
class function GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer; static;
function GetByteCount(const Chars: TCharArray): Integer; overload;
function GetByteCount(const Chars: TCharArray; CharIndex, CharCount: Integer): Integer; overload;
function GetByteCount(const S: string): Integer; overload;
function GetByteCount(const S: string; CharIndex, CharCount: Integer): Integer; overload;
function GetBytes(const Chars: TCharArray): TBytes; overload;
function GetBytes(const Chars: TCharArray; CharIndex, CharCount: Integer;
var Bytes: TBytes; ByteIndex: Integer): Integer; overload;
function GetBytes(const S: string): TBytes; overload;
function GetBytes(const S: string; CharIndex, CharCount: Integer;
var Bytes: TBytes; ByteIndex: Integer): Integer; overload;
function GetCharCount(const Bytes: TBytes): Integer; overload;
function GetCharCount(const Bytes: TBytes; ByteIndex, ByteCount: Integer): Integer; overload;
function GetChars(const Bytes: TBytes): TCharArray; overload;
function GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer): TCharArray; overload;
function GetChars(const Bytes: TBytes; ByteIndex, ByteCount: Integer;
var Chars: TCharArray; CharIndex: Integer): Integer; overload;
class function GetEncoding(CodePage: Integer): TEncoding; static;
function GetMaxByteCount(CharCount: Integer): Integer; virtual; abstract;
function GetMaxCharCount(ByteCount: Integer): Integer; virtual; abstract;
function GetPreamble: TBytes; virtual; abstract;
function GetString(const Bytes: TBytes): string; overload;
function GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): string; overload;
class property ASCII: TEncoding read GetASCII;
class property BigEndianUnicode: TEncoding read GetBigEndianUnicode;
class property Default: TEncoding read GetDefault;
property IsSingleByte: Boolean read FIsSingleByte;
class property Unicode: TEncoding read GetUnicode;
class property UTF7: TEncoding read GetUTF7;
class property UTF8: TEncoding read GetUTF8;
end;
TMBCSEncoding = class(TEncoding)
private
FCodePage: Cardinal;
FMBToWCharFlags: Cardinal;
FWCharToMBFlags: Cardinal;
strict protected
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; overload; virtual;
constructor Create(CodePage: Integer); overload; virtual;
constructor Create(CodePage, MBToWCharFlags, WCharToMBFlags: Integer); overload; virtual;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
TUTF7Encoding = class(TMBCSEncoding)
strict protected
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; override;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
end;
TUTF8Encoding = class(TUTF7Encoding)
public
constructor Create; override;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
TUnicodeEncoding = class(TEncoding)
strict protected
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; virtual;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
TBigEndianUnicodeEncoding = class(TUnicodeEncoding)
strict protected
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
function GetPreamble: TBytes; override;
end;
TStringsEx=class(TStrings)
private
public
procedure LoadFromFile(const FileName: string); overload; virtual;
procedure LoadFromFile(const FileName: string; Encoding: TEncoding); overload; virtual;
procedure LoadFromStream(Stream: TStream); overload; virtual;
procedure LoadFromStream(Stream: TStream; Encoding: TEncoding); overload; virtual;
procedure SaveToFile(const FileName: string); overload; virtual;
procedure SaveToFile(const FileName: string; Encoding: TEncoding); overload; virtual;
procedure SaveToStream(Stream: TStream); overload; virtual;
procedure SaveToStream(Stream: TStream; Encoding: TEncoding); overload; virtual;
end;
{ TStringList class }
TStringListEx = class;
TStringListExSortCompare = function(List: TStringListEx; Index1, Index2: Integer): Integer;
TStringListEx = class(TStringsEx)
private
FList: PStringItemList;
FCount: Integer;
FCapacity: Integer;
FSorted: Boolean;
FDuplicates: TDuplicates;
FCaseSensitive: Boolean;
FOnChange: TNotifyEvent;
FOnChanging: TNotifyEvent;
procedure ExchangeItems(Index1, Index2: Integer);
procedure Grow;
procedure QuickSort(L, R: Integer; SCompare: TStringListExSortCompare);
procedure SetSorted(Value: Boolean);
procedure SetCaseSensitive(const Value: Boolean);
protected
procedure Changed; virtual;
procedure Changing; virtual;
function Get(Index: Integer): string; override;
function GetCapacity: Integer; override;
function GetCount: Integer; override;
function GetObject(Index: Integer): TObject; override;
procedure Put(Index: Integer; const S: string); override;
procedure PutObject(Index: Integer; AObject: TObject); override;
procedure SetCapacity(NewCapacity: Integer); override;
procedure SetUpdateState(Updating: Boolean); override;
function CompareStrings(const S1, S2: string): Integer; override;
procedure InsertItem(Index: Integer; const S: string; AObject: TObject); virtual;
public
destructor Destroy; override;
function Add(const S: string): Integer; override;
function AddObject(const S: string; AObject: TObject): Integer; override;
procedure Clear; override;
procedure Delete(Index: Integer); override;
procedure Exchange(Index1, Index2: Integer); override;
function Find(const S: string; var Index: Integer): Boolean; virtual;
function IndexOf(const S: string): Integer; override;
procedure Insert(Index: Integer; const S: string); override;
procedure InsertObject(Index: Integer; const S: string;
AObject: TObject); override;
procedure Sort; virtual;
procedure CustomSort(Compare: TStringListExSortCompare); virtual;
property Duplicates: TDuplicates read FDuplicates write FDuplicates;
property Sorted: Boolean read FSorted write SetSorted;
property CaseSensitive: Boolean read FCaseSensitive write SetCaseSensitive;
property OnChange: TNotifyEvent read FOnChange write FOnChange;
property OnChanging: TNotifyEvent read FOnChanging write FOnChanging;
end;
{$ENDIF}
{$IFDEF UCS4_ENCODING_SUPPORT}
type
TUCS4Encoding = class(TEncoding)
strict protected
FBOMEnable: Boolean;
FBigEndian: Boolean;
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; overload; virtual;
constructor Create(BigEndian: Boolean); overload; virtual;
constructor Create(BigEndian, BOMEnable: Boolean); overload; virtual;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
{$ENDIF UCS4_ENCODING_SUPPORT}
{CheckBom}
function HasUTF16LEBOM(S: TStream): Boolean; overload;
function HasUTF16LEBOM(S: AnsiString): Boolean; overload;
function HasUTF16BEBOM(S: TStream): Boolean; overload;
function HasUTF16BEBOM(S: AnsiString): Boolean; overload;
function HasUTF8BOM(S: TStream): Boolean; overload;
function HasUTF8BOM(S: AnsiString): Boolean; overload;
function HasUTF32LEBOM(S: TStream): Boolean; overload;
function HasUTF32LEBOM(S: AnsiString): Boolean; overload;
function HasUTF32BEBOM(S: TStream): Boolean; overload;
function HasUTF32BEBOM(S: AnsiString): Boolean; overload;
{Encoding}
function GetEncodeFromStream(Stream: TStream;var CodePage:LongInt): Boolean;
function GetCodePageFromFile(const FileName:TFileName;var DefCodePage:LongInt):Boolean;
function GetEncoding(CodePage: Integer): TEncoding;
function GetEnCodingFromFile(const FileName:TFileName;DefCodePage:LongInt=0):TEncoding;
{File<->Str}
function StrToFile(const FileName, S: String): Boolean;overload;
function StrToFile(const FileName, S: String;Encoding:TEncoding): Boolean;overload;
procedure StrSaveToStream(const S:string;Stream: TStream; Encoding: TEncoding);
function FileToStr(const FileName: string):string;overload;
function FileToStr(const FileName: string; Encoding: TEncoding):string;overload;
function FileToString(const FileName: string): RawByteString;
function StrLoadFromStream(Stream: TStream; Encoding: TEncoding):string;
{Unicode}
function UnicodeEncode(Str: string; CodePage: integer): WideString;
function UnicodeDecode(Str: WideString; CodePage: integer): string;
function Gb2Big5(Str: string): string;
function Big52Gb(Str: string): string;
function GBCHT2CHS(const S: string): string;
function GBCHS2CHT(const S: string): string;
implementation
uses SysConst,RTLConsts,
{$IFDEF UNICODE}
{$IFDEF UCS4_ENCODING_SUPPORT}
Character;
{$ENDIF UCS4_ENCODING_SUPPORT}
{$ELSE UNICODE}
{$IFDEF UCS4_ENCODING_SUPPORT}
uCharacter;
{$ENDIF UCS4_ENCODING_SUPPORT}
{$ENDIF UNICODE}
const
UTF16LEString: array[1..2] of AnsiChar = (#$FF, #$FE);
UTF16BEString: array[1..2] of AnsiChar = (#$FE, #$FF);
UTF8BOMString: array[1..3] of AnsiChar = (#$EF, #$BB, #$BF);
UTF32LEString: array[1..4] of AnsiChar = (#$FF, #$FE, #$00, #$00);
UTF32BEString: array[1..4] of AnsiChar = (#$00, #$00, #$FE, #$FF);
{$IFNDEF UNICODE}
resourcestring
// Used by TEncoding
SInvalidSourceArray = 'Invalid source array';
SInvalidDestinationArray = 'Invalid destination array';
SCharIndexOutOfBounds = 'Character index out of bounds (%d)';
SByteIndexOutOfBounds = 'Start index out of bounds (%d)';
SInvalidCharCount = 'Invalid count (%d)';
SInvalidDestinationIndex = 'Invalid destination index (%d)';
SInvalidCodePage = 'Invalid code page';
{$ENDIF}
{$IFNDEF UNICODE}
function InterlockedCompareExchangePointer(var Destination: Pointer; Exchange: Pointer; Comperand: Pointer): Pointer;
begin
Result := Pointer(InterlockedCompareExchange(Integer(Destination), Integer(Exchange), Integer(Comperand)));
end;
{$ENDIF}
{$IFNDEF UNICODE}
{ TEncoding }
class function TEncoding.Convert(Source, Destination: TEncoding; Bytes: TBytes): TBytes;
begin
Result := Destination.GetBytes(Source.GetChars(Bytes));
end;
class function TEncoding.Convert(Source, Destination: TEncoding; Bytes: TBytes;
StartIndex, Count: Integer): TBytes;
begin
Result := Destination.GetBytes(Source.GetChars(Bytes, StartIndex, Count));
end;
class procedure TEncoding.FreeEncodings;
begin
FreeAndNil(FDefaultEncoding);
FreeAndNil(FASCIIEncoding);
FreeAndNil(FUTF7Encoding);
FreeAndNil(FUTF8Encoding);
FreeAndNil(FUnicodeEncoding);
FreeAndNil(FBigEndianUnicodeEncoding);
end;
class function TEncoding.GetASCII: TEncoding;
var
LEncoding: TEncoding;
begin
if FASCIIEncoding = nil then
begin
LEncoding := TMBCSEncoding.Create(20127, 0, 0);
if InterlockedCompareExchangePointer(Pointer(FASCIIEncoding), LEncoding, nil) <> nil then
LEncoding.Free;
end;
Result := FASCIIEncoding;
end;
class function TEncoding.GetBigEndianUnicode: TEncoding;
var
LEncoding: TEncoding;
begin
if FBigEndianUnicodeEncoding = nil then
begin
LEncoding := TBigEndianUnicodeEncoding.Create;
if InterlockedCompareExchangePointer(Pointer(FBigEndianUnicodeEncoding), LEncoding, nil) <> nil then
LEncoding.Free;
end;
Result := FBigEndianUnicodeEncoding;
end;
class function TEncoding.GetBufferEncoding(const Buffer: TBytes; var AEncoding: TEncoding): Integer;
function ContainsPreamble(const Buffer, Signature: TBytes): Boolean;
var
I: Integer;
begin
Result := True;
if Length(Buffer) >= Length(Signature) then
begin
for I := 1 to Length(Signature) do
if Buffer[I - 1] <> Signature [I - 1] then
begin
Result := False;
Break;
end;
end
else
Result := False;
end;
var
Preamble: TBytes;
begin
Result := 0;
if AEncoding = nil then
begin
// Find the appropraite encoding
if ContainsPreamble(Buffer, TEncoding.Unicode.GetPreamble) then
AEncoding := TEncoding.Unicode
else
if ContainsPreamble(Buffer, TEncoding.BigEndianUnicode.GetPreamble) then
AEncoding := TEncoding.BigEndianUnicode
else
if ContainsPreamble(Buffer, TEncoding.UTF8.GetPreamble) then
AEncoding := TEncoding.UTF8
else
AEncoding := TEncoding.Default;
Result := Length(AEncoding.GetPreamble);
end else
begin
Preamble := AEncoding.GetPreamble;
if ContainsPreamble(Buffer, Preamble) then
Result := Length(Preamble);
end;
end;
function TEncoding.GetByteCount(const Chars: TCharArray): Integer;
begin
Result := GetByteCount(Chars, 0, Length(Chars));
end;
function TEncoding.GetByteCount(const Chars: TCharArray; CharIndex, CharCount: Integer): Integer;
begin
if CharIndex < 0 then
raise EEncodingError.CreateResFmt(@SCharIndexOutOfBounds, [CharIndex]);
if CharCount < 0 then
raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
if (Length(Chars) - CharIndex) < CharCount then
raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
Result := GetByteCount(@Chars[CharIndex], CharCount);
end;
function TEncoding.GetByteCount(const S: string): Integer;
begin
Result := GetByteCount(PChar(S), Length(S));
end;
function TEncoding.GetByteCount(const S: string; CharIndex, CharCount: Integer): Integer;
begin
if CharIndex < 1 then
raise EEncodingError.CreateResFmt(@SCharIndexOutOfBounds, [CharIndex]);
if CharCount < 0 then
raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
if (Length(S) - CharIndex + 1) < CharCount then
raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
Result := GetByteCount(PChar(@S[CharIndex]), CharCount);
end;
function TEncoding.GetBytes(const Chars: TCharArray): TBytes;
var
Len: Integer;
begin
Len := GetByteCount(Chars);
SetLength(Result, Len);
GetBytes(Chars, 0, Length(Chars), Result, 0);
end;
function TEncoding.GetBytes(const Chars: TCharArray; CharIndex, CharCount: Integer;
var Bytes: TBytes; ByteIndex: Integer): Integer;
var
Len: Integer;
begin
if (Chars = nil) and (CharCount <> 0) then
raise EEncodingError.CreateRes(@SInvalidSourceArray);
if (Bytes = nil) and (CharCount <> 0) then
raise EEncodingError.CreateRes(@SInvalidDestinationArray);
if CharIndex < 0 then
raise EEncodingError.CreateResFmt(@SCharIndexOutOfBounds, [CharIndex]);
if CharCount < 0 then
raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
if (Length(Chars) - CharIndex) < CharCount then
raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
Len := Length(Bytes);
if (ByteIndex < 0) or (ByteIndex > Len) then
raise EEncodingError.CreateResFmt(@SInvalidDestinationIndex, [ByteIndex]);
if Len - ByteIndex < GetByteCount(Chars, CharIndex, CharCount) then
raise EEncodingError.CreateRes(@SInvalidDestinationArray);
Result := GetBytes(@Chars[CharIndex], CharCount, @Bytes[ByteIndex], Len - ByteIndex);
end;
function TEncoding.GetBytes(const S: string): TBytes;
var
Len: Integer;
begin
Len := GetByteCount(S);
SetLength(Result, Len);
GetBytes(S, 1, Length(S), Result, 0);
end;
function TEncoding.GetBytes(const S: string; CharIndex, CharCount: Integer;
var Bytes: TBytes; ByteIndex: Integer): Integer;
var
Len: Integer;
begin
if (Bytes = nil) and (CharCount <> 0) then
raise EEncodingError.CreateRes(@SInvalidSourceArray);
if CharIndex < 1 then
raise EEncodingError.CreateResFmt(@SCharIndexOutOfBounds, [CharIndex]);
if CharCount < 0 then
raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
if (Length(S) - CharIndex + 1) < CharCount then
raise EEncodingError.CreateResFmt(@SInvalidCharCount, [CharCount]);
Len := Length(Bytes);
if (ByteIndex < 0) or (ByteIndex > Len) then
raise EEncodingError.CreateResFmt(@SInvalidDestinationIndex, [ByteIndex]);
if Len - ByteIndex < GetByteCount(S, CharIndex, CharCount) then
raise EEncodingError.CreateRes(@SInvalidDestinationArray);
Result := GetBytes(@S[CharIndex], CharCount, @Bytes[ByteIndex], Len - ByteIndex);
end;
function TEncoding.GetCharCount(const Bytes: TBytes): Integer;
begin
Result := GetCharCount(Bytes, 0, Length(Bytes));
end;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -