📄 gptextstream.pas
字号:
{ $OmniXML: OmniXML/GpTextStream.pas,v 1.1.1.1 2004/04/17 11:16:33 mr Exp $ }
{$B-,H+,J+,Q-,T-,X+}
{$UNDEF D3PLUS}
{$UNDEF D4PLUS}
{$IFDEF VER100}{$DEFINE D3PLUS}{$ENDIF}
{$IFDEF VER120}{$DEFINE D3PLUS}{$DEFINE D4PLUS}{$ENDIF}
{$IFDEF VER130}{$DEFINE D3PLUS}{$DEFINE D4PLUS}{$ENDIF}
{$IFDEF VER140}{$DEFINE D3PLUS}{$DEFINE D4PLUS}{$DEFINE D6PLUS}{$ENDIF}
{$IFDEF VER150}{$DEFINE D3PLUS}{$DEFINE D4PLUS}{$DEFINE D6PLUS}{$DEFINE D7PLUS}{$ENDIF}
unit GpTextStream;
(*:Stream wrapper class that automatically converts another stream (containing
text data) into a Unicode stream. Underlying stream can contain 8-bit text
(in any codepage) or 16-bit text (in 16-bit or UTF8 encoding).
@author Primoz Gabrijelcic
@desc <pre>
This software is distributed under the BSD license.
Copyright (c) 2003, Primoz Gabrijelcic
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
- The name of the Primoz Gabrijelcic may not be used to endorse or promote
products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Author : Primoz Gabrijelcic
Creation date : 2001-07-17
Last modification: 2003-05-16
Version : 1.02
</pre>
*)(*
History:
1.02: 2003-05-16
- Compatible with Delphi 7.
1.01: 2002-04-24
- Added TGpTSCreateFlag flag tscfCompressed to keep this enum in sync
with GpTextFile.TCreateFlag.
1.0b: 2001-12-15
- Updated to compile with Delphi 6 (thanks to Artem Khassanov).
1.0a: 2001-10-06
- Fixed error in GpTextStream.Read that could cause exception to be
raised unnecessary.
1.0: 2001-07-17
- Created from GpTextFile 3.0b (thanks to Miha Remec).
- Fix UTF 8 decoding error in TGpTextStream.Read.
*)
interface
uses
Windows,
SysUtils,
Classes,
GpStreamWrapper;
// HelpContext values for all raised exceptions.
const
//:Windows error.
hcTFWindowsError = 3001;
//:Unknown Windows error.
hcTFUnknownWindowsError = 3002;
//:Cannot append reversed Unicode stream - not supported.
hcTFCannotAppendReversed = 3003;
//:Cannot write to reversed Unicode stream - not supported.
hcTFCannotWriteReversed = 3004;
//:Cannot convert odd number of bytes.
hcTFCannotConvertOdd = 3005;
const
{$IFNDEF D3plus}
CP_UTF8 = 65001; // UTF-8 pseudo-codepage, defined in Windows.pas in Delphi 3 and newer.
{$ENDIF}
CP_UNICODE = 1200; // Unicode pseudo-codepage,
ISO_8859_1 = 28591; // Western Alphabet (ISO)
ISO_8859_2 = 28592; // Central European Alphabet (ISO)
ISO_8859_3 = 28593; // Latin 3 Alphabet (ISO)
ISO_8859_4 = 28594; // Baltic Alphabet (ISO)
ISO_8859_5 = 28595; // Cyrillic Alphabet (ISO)
ISO_8859_6 = 28596; // Arabic Alphabet (ISO)
ISO_8859_7 = 28597; // Greek Alphabet (ISO)
ISO_8859_8 = 28598; // Hebrew Alphabet (ISO)
type
{:Base exception class for exceptions created in TGpTextStream.
}
EGpTextStream = class(Exception);
{:Text stream creation flags. Copied from GpTextFile.TCreateFlag. Must be kept
in sync!
@enum tscfUnicode Create Unicode stream.
@enum tscfReverseByteOrder Create unicode stream with reversed byte order
(Motorola format). Used only in Read access,
not valid in Write access.
@enum tscfUse2028 Use standard /2028/ instead of /000D/000A/ for
line delimiter (MS Notepad and MS Word do not
understand $2028 delimiter). Applies to Unicode
streams only.
@enum tscfUseLF Use /LF/ instead of /CR/LF/ for line delimiter.
Applies to 8-bit streams only.
@enum tscfWriteUTF8BOM Write UTF-8 Byte Order Mark to the beginning of
stream.
@enum tscfCompressed Will try to set the "compressed" attribute (when
running on NT and file is on NTFS drive).
}
TGpTSCreateFlag = (tscfUnicode, tscfReverseByteOrder, tscfUse2028, tscfUseLF,
tscfWriteUTF8BOM, tscfCompressed);
{:Set of all creation flags.
}
TGpTSCreateFlags = set of TGpTSCreateFlag;
{:Line delimiters.
@enum tstsldCR Carriage return (Mac style).
@enum tstsldLF Line feed (Unix style).
@enum tstsldCRLF Carriage return + Line feed (DOS style).
@enum tstsldLFCR Line feed + Carriage return (very unusual combination).
@enum tstsld2028 /2028/ Unicode delimiter.
@enum tstsld000D000A /000D/000A/ Windows-style Unicode delimiter.
}
TGpTSLineDelimiter = (tsldCR, tsldLF, tsldCRLF, tsldLFCR, tsld2028, tsld000D000A);
{:Set of all line delimiters.
}
TGpTSLineDelimiters = set of TGpTSLineDelimiter;
{:All possible ways to access TGpTextStream. Copied from GpHugeF. Must be kept
in sync!
@enum tstsaccRead Read access.
@enum tstsaccWrite Write access.
@enum tstsaccReadWrite Read and write access.
@enum tstsaccAppend Same as tsaccReadWrite, just that Position is set
immediatly after the end of stream.
}
TGpTSAccess = (tsaccRead, tsaccWrite, tsaccReadWrite, tsaccAppend);
{:Unified 8/16-bit text stream access. All strings passed as Unicode,
conversion to/from 8-bit is done automatically according to specified code
page.
}
TGpTextStream = class(TGpStreamWrapper)
private
tsAccess : TGpTSAccess;
tsCodePage : word;
tsCreateFlags : TGpTSCreateFlags;
tsLineDelims : TGpTSLineDelimiters;
tsReadlnBuf : TMemoryStream;
tsSmallBuf : pointer;
tsWindowsError: DWORD;
protected
function AllocBuffer(size: integer): pointer; virtual;
procedure FreeBuffer(var buffer: pointer); virtual;
function GetWindowsError: DWORD; virtual;
procedure PrepareStream; virtual;
procedure SetCodepage(cp: word); virtual;
function StreamName(param: string = ''): string; virtual;
procedure Win32Check(condition: boolean; method: string);
public
constructor Create(
dataStream: TStream; access: TGpTSAccess;
createFlags: TGpTSCreateFlags {$IFDEF D4plus}= []{$ENDIF};
codePage: word {$IFDEF D4plus}= 0{$ENDIF}
);
destructor Destroy; override;
function Is16bit: boolean;
function IsUnicode: boolean;
function Read(var buffer; count: longint): longint; override;
function Readln: WideString;
function Write(const buffer; count: longint): longint; override;
function Writeln(const ln: WideString{$IFDEF D4plus}= ''{$ENDIF}): boolean;
function WriteString(const ws: WideString): boolean;
{:Accepted line delimiters (CR, LF or any combination).
}
property AcceptedDelimiters: TGpTSLineDelimiters read tsLineDelims
write tsLineDelims;
{:Code page used to convert 8-bit stream to Unicode and back. May be changed
while stream is open (and even partially read). If set to 0, current
default code page will be used.
}
property Codepage: word read tsCodePage write SetCodepage;
{:Stream size. Reintroduced to override GetSize (static in TStream) with
faster version.
}
property Size: {$IFDEF D7PLUS}int64{$ELSE}longint{$ENDIF D7PLUS} read GetSize write SetSize;
{:Last Windows error code.
}
property WindowsError: DWORD read GetWindowsError;
end; { TGpTextStream }
implementation
uses
SysConst;
const
{:Header for 'normal' Unicode stream (Intel format).
}
CUnicodeNormal : WideChar = WideChar($FEFF);
{:Header for 'reversed' Unicode stream (Motorola format).
}
CUnicodeReversed: WideChar = WideChar($FFFE);
{:First two bytes of UTF-8 BOM.
}
CUTF8BOM12: WideChar = WideChar($BBEF);
{:Third byte of UTF-8 BOM.
}
CUTF8BOM3: Char = Char($BF);
{:Size of preallocated buffer used for 8 to 16 to 8 bit conversions in
TGpTextStream.
}
CtsSmallBufSize = 2048; // 1024 WideChars
{$IFDEF D3plus}
resourcestring
{$ELSE}
const
{$ENDIF}
sCannotAppendReversedUnicodeStream = '%s:Cannot append reversed Unicode stream.';
sCannotConvertOddNumberOfBytes = '%s:Cannot convert odd number of bytes: %d';
sCannotWriteReversedUnicodeStream = '%s:Cannot write to reversed Unicode stream.';
sStreamFailed = '%s failed. ';
{:Converts Ansi string to Unicode string using specified code page.
@param s Ansi string.
@param codePage Code page to be used in conversion.
@returns Converted wide string.
}
function StringToWideString(const s: AnsiString; codePage: word): WideString;
var
l: integer;
begin
if s = '' then
Result := ''
else begin
l := MultiByteToWideChar(codePage, MB_PRECOMPOSED, PChar(@s[1]), -1, nil, 0);
SetLength(Result, l-1);
if l > 1 then
MultiByteToWideChar(CodePage, MB_PRECOMPOSED, PChar(@s[1]), -1, PWideChar(@Result[1]), l-1);
end;
end; { StringToWideString }
{:Converts Unicode string to Ansi string using specified code page.
@param ws Unicode string.
@param codePage Code page to be used in conversion.
@returns Converted ansi string.
}
function WideStringToString (const ws: WideString; codePage: Word): AnsiString;
var
l: integer;
begin
if ws = '' then
Result := ''
else begin
l := WideCharToMultiByte(codePage,
WC_COMPOSITECHECK or WC_DISCARDNS or WC_SEPCHARS or WC_DEFAULTCHAR,
@ws[1], -1, nil, 0, nil, nil);
SetLength(Result, l-1);
if l > 1 then
WideCharToMultiByte(codePage,
WC_COMPOSITECHECK or WC_DISCARDNS or WC_SEPCHARS or WC_DEFAULTCHAR,
@ws[1], -1, @Result[1], l-1, nil, nil);
end;
end; { WideStringToString }
{:Convers buffer of WideChars into UTF-8 encoded form. Target buffer must be
pre-allocated and large enough (each WideChar will use at most three bytes
in UTF-8 encoding). <br>
RFC 2279 (http://www.ietf.org/rfc/rfc2279.txt) describes the conversion: <br>
$0000..$007F => $00..$7F <br>
$0080..$07FF => 110[bit10..bit6] 10[bit5..bit0] <br>
$0800..$FFFF => 1110[bit15..bit12] 10[bit11..bit6] 10[bit5..bit0]
@param unicodeBuf Buffer of WideChars.
@param uniByteCount Size of unicodeBuf, in bytes.
@param utf8Buf Pre-allocated buffer for UTF-8 encoded result.
@returns Number of bytes used in utf8Buf buffer.
@since 2.01
}
function WideCharBufToUTF8Buf(const unicodeBuf; uniByteCount: integer;
var utf8Buf): integer;
var
iwc: integer;
pch: PChar;
pwc: PWideChar;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -