📄 gptextfile.pas

📁 Advanced Data Import Component Suite for Borland Delphi and C++ Builder allows you to import your da
💻 PAS
📖 第 1 页 / 共 5 页
字号:
      diskLockTimeout: integer {$IFDEF D4plus}= 0{$ENDIF};
      diskRetryDelay: integer  {$IFDEF D4plus}= 0{$ENDIF};
      waitObject: THandle      {$IFDEF D4plus}= 0{$ENDIF};
      codePage: word           {$IFDEF D4plus}= 0{$ENDIF}): THFError;
    procedure Write(params: array of const); {$IFDEF D4plus} overload;
    procedure Write(s: AnsiString); overload; {$ENDIF D4plus}
    procedure Writeln(ln: WideString {$IFDEF D4plus}= ''{$ENDIF}); {$IFDEF D4plus} overload;
    function  Is32bit: boolean;
    procedure Writeln(params: array of const); overload; {$ENDIF D4plus}
    //:Accepted line delimiters (CR, LF or any combination).
    property AcceptedDelimiters: TLineDelimiters read tfLineDelims
      write tfLineDelims;
    {:Code page used to convert 8-bit files to Unicode and back. May be changed
      while file is open (and even partially read). If set to 0, current default
      code page will be used.
    }
    property Codepage: word read tfCodepage write SetCodepage;
    {:File flags - as decoded from file structure or as passed to the Rewrite[Safe.]
      @since   2005-12-22
    }
    property FileFlags: TCreateFlags read tfCFlags;
  end; { TGpTextFile }

  {:Wrapper for TGpTextStream that automatically creates TGpHugeFileStream for
    a specified file in the constructor and destroys it in the destructor.
  }
  TGpTextFileStream = class(TGpTextStream)
  private
    tfsStream: TGpHugeFileStream;
  protected
    function  GetFileName: WideString; virtual;
    function  GetWindowsError: DWORD; override;
    function  StreamName(param: string = ''): string; override;
  public
    constructor Create(
      const fileName: string; access: TGpHugeFileStreamAccess;
      openFlags: TOpenFlags     {$IFDEF D4plus}= []{$ENDIF};
      createFlags: TCreateFlags {$IFDEF D4plus}= []{$ENDIF};
      codePage: word            {$IFDEF D4plus}= 0{$ENDIF}
      );
    constructor CreateW(
      const fileName: WideString; access: TGpHugeFileStreamAccess;
      openFlags: TOpenFlags     {$IFDEF D4plus}= []{$ENDIF};
      createFlags: TCreateFlags {$IFDEF D4plus}= []{$ENDIF};
      codePage: word            {$IFDEF D4plus}= 0{$ENDIF}
      );
    destructor  Destroy; override;
    //:Name of underlying file.
    property  FileName: WideString read GetFileName;
  end; { TGpTextFileStream }

function  StringToWideStringNoCP(const s: AnsiString): WideString; overload;
procedure StringToWideStringNoCP(const s: AnsiString; var w: WideString); overload;
procedure StringToWideStringNoCP(const buf; bufLen: integer; var w: WideString); overload;
function  WideStringToStringNoCP(const s: WideString): AnsiString;

implementation

uses
  SysUtils,
  SysConst;

const
  {:Header for 'normal' Unicode UCS-4 stream (Intel format).
  }
  CUnicode32Normal: UCS4Char = UCS4Char($0000FEFF);

  {:Header for 'reversed' Unicode UCS-4 stream (Motorola format).
  }
  CUnicode32Reversed: UCS4Char = UCS4Char($0000FFFE);

  {:Header for big-endian (Motorola) Unicode file.
  }
  CUnicodeNormal  : WideChar = WideChar($FEFF);

  {:Header for little-endian (Intel) Unicode file.
  }
  CUnicodeReversed: WideChar = WideChar($FFFE);

  {:First two bytes of UTF-8 BOM.
  }
  CUTF8BOM12: WideChar = WideChar($BBEF);

  {:Third byte of UTF-8 BOM.
  }
  CUTF8BOM3: AnsiChar = AnsiChar($BF);

  {:Size of preallocated buffer used for 8 to 16 to 8 bit conversions in
    TGpTextFile.
  }
  CtsSmallBufSize = 2048; // 1024 WideChars

{$IFDEF D3plus}
resourcestring
{$ELSE}
const
{$ENDIF}
  sCannotAppendReversedUnicodeFile   = 'TGpTextFile(%s):Cannot append reversed Unicode file.';
  sCannotAppendReversedUnicodeStream = '%s:Cannot append reversed Unicode file.';
  sCannotConvertOddNumberOfBytes     = '%s:Cannot convert odd number of bytes: %d';
  sCannotWriteReversedUnicodeFile    = 'TGpTextFile(%s):Cannot write to reversed Unicode file.';
  sCannotWriteReversedUnicodeStream  = '%s:Cannot write to reversed Unicode file.';
  sFailedToAppendFile                = 'TGpTextFile(%s):Failed to append.';
  sFailedToResetFile                 = 'TGpTextFile(%s):Failed to reset file.';
  sFailedToRewriteFile               = 'TGpTextFile(%s):Failed to rewrite file.';
  sInvalidParameter                  = 'TGpTextFile(%s):Invalid parameter!';
  sStreamFailed                      = '%s failed. ';

{:Converts Ansi string to Unicode string without code page conversion.
  @param   s        Ansi string.
  @returns Converted wide string.
}
function StringToWideStringNoCP(const s: AnsiString): WideString; overload;
begin
  Result := '';
  StringToWideStringNoCP(s, Result);
end; { StringToWideStringNoCP }

{:Converts Ansi string to Unicode string without code page conversion.
  @param   s Ansi string.
  @returns w Wide string. New data will be appended to the original contents.
}
procedure StringToWideStringNoCP(const s: AnsiString; var w: WideString); overload;
begin
  if s <> '' then
    StringToWideStringNoCP(s[1], Length(s), w);
end; { StringToWideStringNoCP }

{:Converts buffer of ansi characters to Unicode string without code page conversion.
  @param   s   Buffer of ansi characters.
  @param   len Length of the buffer.
  @returns w   Wide string. New data will be appended to the original contents.
}
procedure StringToWideStringNoCP(const buf; bufLen: integer; var w: WideString); overload;
var
  iCh    : integer;
  lResult: integer;
  pOrig  : PByte;
  pResult: PWideChar;
begin
  if bufLen > 0 then begin
    lResult := Length(w);
    SetLength(w, lResult+bufLen);
    pOrig := @buf;
    pResult := @w[lResult+1];
    for iCh := 1 to bufLen do begin
      pResult^ := WideChar(pOrig^);
      Inc(pOrig);
      Inc(pResult);
    end;
  end;
end; { StringToWideStringNoCP }

{:Converts Unicode string to Ansi string without code page conversion.
  @param   s        Ansi string.
  @param   codePage Code page to be used in conversion.
  @returns Converted wide string.
}
function WideStringToStringNoCP(const s: WideString): AnsiString;
var
  pResult: PByte;
  pOrig: PWord;
  i, l: integer;
begin
  if s = '' then     
    Result := ''
  else begin
    l := Length(s);
    SetLength(Result, l);
    pOrig := @s[1];
    pResult := @Result[1];
    for i := 1 to l do begin
      pResult^ := pOrig^ AND $FF;
      Inc(pResult);
      Inc(pOrig);
    end;
  end;
end; { WideStringToStringNoCP }

{:Converts Ansi string to Unicode string using specified code page.
  @param   s        Ansi string.
  @param   codePage Code page to be used in conversion.
  @param   w        Resulting string. Original contents is preserved (new data
                    is appended).
  @returns Converted wide string.
}
procedure StringToWideString(const s: AnsiString; codePage: word; var w: WideString); overload;
var
  l: integer;
  lResult: integer;
begin
  if s <> '' then begin
    l := MultiByteToWideChar(codePage, MB_PRECOMPOSED, PAnsiChar(@s[1]), -1, nil, 0);
    lResult := Length(w);
    SetLength(w, lResult+l-1);
    if l > 1 then
      MultiByteToWideChar(CodePage, MB_PRECOMPOSED, PAnsiChar(@s[1]), -1, PWideChar(@w[lResult+1]), l-1);
  end;
end; { StringToWideString }

{:Converts Ansi string to Unicode string using specified code page.
  @param   s        Ansi string.
  @param   codePage Code page to be used in conversion.
  @returns Converted wide string.
}
function StringToWideString(const s: AnsiString; codePage: word): WideString; overload;
begin
  StringToWideString(s, codePage, Result);
end; { StringToWideString }

{:Converts Ansi string to Unicode string using specified code page.
  @param   buf      Buffer containing ansi characters.
  @param   bufLen   Length of the buffer.
  @param   codePage Code page to be used in conversion.
  @param   w        Resulting string. Original contents is preserved (new data
                    is appended).
  @returns Converted wide string.
}
procedure StringToWideString(const buf; bufLen: integer; codePage: word; var w: WideString); overload;
var
  l      : integer;
  lResult: integer;
  oldChar: AnsiChar;
begin
  if bufLen > 0 then begin
    oldChar := PAnsiChar(integer(@buf)+bufLen)^;
    PChar(integer(@buf)+bufLen)^ := #0;
    try
      l := MultiByteToWideChar(codePage, MB_PRECOMPOSED, PAnsiChar(@buf), -1, nil, 0);
      lResult := Length(w);
      SetLength(w, lResult+l-1);
      if l > 1 then
        MultiByteToWideChar(CodePage, MB_PRECOMPOSED, PAnsiChar(@buf), -1, PWideChar(@w[lResult+1]), l-1);
    finally PAnsiChar(integer(@buf)+bufLen)^ := oldChar; end;
  end;
end; { StringToWideString }

{:Converts Unicode string to Ansi string using specified code page.
  @param   ws       Unicode string.
  @param   codePage Code page to be used in conversion.
  @returns Converted ansi string.
}
function WideStringToString (const ws: WideString; codePage: Word): AnsiString;
var
  l: integer;
begin
  if ws = '' then
    Result := ''
  else begin
    l := WideCharToMultiByte(codePage,
           WC_COMPOSITECHECK or WC_DISCARDNS or WC_SEPCHARS or WC_DEFAULTCHAR,
           @ws[1], -1, nil, 0, nil, nil);
    SetLength(Result, l-1);
    if l > 1 then
      WideCharToMultiByte(codePage,
        WC_COMPOSITECHECK or WC_DISCARDNS or WC_SEPCHARS or WC_DEFAULTCHAR,
        @ws[1], -1, @Result[1], l-1, nil, nil);
  end;
end; { WideStringToString }

{:Convers buffer of WideChars into UTF-8 encoded form. Target buffer must be
  pre-allocated and large enough (each WideChar will use at most three bytes
  in UTF-8 encoding).                                                            <br>
  RFC 2279 (http://www.ietf.org/rfc/rfc2279.txt) describes the conversion:       <br>
  $0000..$007F => $00..$7F                                                       <br>
  $0080..$07FF => 110[bit10..bit6] 10[bit5..bit0]                                <br>
  $0800..$FFFF => 1110[bit15..bit12] 10[bit11..bit6] 10[bit5..bit0]
  @param   unicodeBuf   Buffer of WideChars.
  @param   uniByteCount Size of unicodeBuf, in bytes.
  @param   utf8Buf      Pre-allocated buffer for UTF-8 encoded result.
  @returns Number of bytes used in utf8Buf buffer.
  @since   2.01
}
function WideCharBufToUTF8Buf(const unicodeBuf; uniByteCount: integer;
  var utf8Buf): integer;
var
  iwc: integer;
  pch: PAnsiChar;
  pwc: PWideChar;
  wc : word;

  procedure AddByte(b: byte);
  begin
    pch^ := AnsiChar(b);
    Inc(pch);
  end; { AddByte }

begin { WideCharBufToUTF8Buf }
  pwc := @unicodeBuf;
  pch := @utf8Buf;
  for iwc := 1 to uniByteCount div SizeOf(WideChar) do begin
    wc := Ord(pwc^);
    Inc(pwc);
    if (wc >= $0001) and (wc <= $007F) then begin
      AddByte(wc AND $7F);
    end
    else if (wc >= $0080) and (wc <= $07FF) then begin
      AddByte($C0 OR ((wc SHR 6) AND $1F));
      AddByte($80 OR (wc AND $3F));
    end
    else begin // (wc >= $0800) and (wc <= $FFFF)
      AddByte($E0 OR ((wc SHR 12) AND $0F));
      AddByte($80 OR ((wc SHR 6) AND $3F));
      AddByte($80 OR (wc AND $3F));
    end;
  end; //for
  Result := integer(pch)-integer(@utf8Buf);
end; { WideCharBufToUTF8Buf }

{:Converts UTF-8 encoded buffer into WideChars. Target buffer must be
  pre-allocated and large enough (at most utfByteCount number of WideChars will
  be generated).                                                                 <br>
  RFC 2279 (http://www.ietf.org/rfc/rfc2279.txt) describes the conversion:       <br>
  $00..$7F => $0000..$007F                                                       <br>
  110[bit10..bit6] 10[bit5..bit0] => $0080..$07FF                                <br>
  1110[bit15..bit12] 10[bit11..bit6] 10[bit5..bit0] => $0800..$FFFF
  @param   utf8Buf      UTF-8 encoded buffer.
  @param   utfByteCount Size of utf8Buf, in bytes.
  @param   unicodeBuf   Pre-allocated buffer for WideChars.
  @param   leftUTF8     Number of bytes left in utf8Buf after conversion (0, 1,
                        or 2).
  @returns Number of bytes used in unicodeBuf buffer.
  @since   2.01
}
function UTF8BufToWideCharBuf(const utf8Buf; utfByteCount: integer;
 var unicodeBuf; var leftUTF8: integer): integer;
var
  c1 : byte;
  c2 : byte;
  ch : byte;
  pch: PAnsiChar;
  pwc: PWideChar;
begin
  pch := @utf8Buf;
  pwc := @unicodeBuf;
  leftUTF8 := utfByteCount;
  while leftUTF8 > 0 do begin
    ch := byte(pch^);
    Inc(pch);
    if (ch AND $80) = 0 then begin // 1-byte code
      word(pwc^) := ch;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -