📄 faststrings.pas
字号:
//==================================================
//All code herein is copyrighted by
//Peter Morris
//-----
//No copying, alteration, or use is permitted without
//prior permission from myself.
//------
//Do not alter / remove this copyright notice
//Email me at : support@droopyeyes.com
//
//The homepage for this library is http://www.droopyeyes.com
//
// CURRENT VERSION V3.1
//
//(Check out www.HowToDoThings.com for Delphi articles !)
//(Check out www.stuckindoors.com if you need a free events page on your site !)
//==================================================
//Ps
//Permission can be obtained very easily, and there is no ## involved,
//please email me for permission, this way you can be included on the
//email list to be notififed of updates / fixes etc.
//(It just includes sending my kids a postcard, nothing more !)
//Modifications
//==============================================================================
//Date : 17 Dec, 1999
//Found : VRP (on #Delphi EFNET)
//Fixed : VRP
//Change: Added SmartPos. This will allow people to easily change POS to SmartPos
// as the parameters are in the same order. Clever use of default params
// means that the extra functionality of FastStrings may be used by passing
// some extra params.
//==============================================================================
//Date : 17 Dec, 1999
//Found : Bob Richardson
//Fixed : Pete M
//Change: Oops a daisy. FastPosBack (and NoCase) were not setting SearchLen
// if a valid StartPos was passed.
//==============================================================================
//Date : 10 Jan, 2000
//Found : Pete M
//Fixed : Pete M
//Change: Moved TFastPosProc into the interface section, so other routines
// can use the same technique that I do in FastReplace
//==============================================================================
//Date : 15 Jan, 2000
//Found : Pete M
//Fixed : Pete M
//Change: Created a FastCharPos and FastCharPosNoCase, if the code knows that
// the FindString is only 1 char, it can use faster methods.
//==============================================================================
//Date : 1 Mar, 2000
//Found : Pete M
//Fixed : Pete M
//Change: Changed the name of MyMove to FastCharMove, and added it to the
// interface section.
//==============================================================================
//Date : 5 Mar, 2000
//Found : Pete M
//Fixed : Pete M
//Change: Changed FastPosNoCase to implement the above changes AND to use a
// lookup table for UpCase characters.
//==============================================================================
//Date : 5 Mar, 2000
//Found : Pete M
//Fixed : Pete M
//Change: Realised that I was moving [EDI] into ah before comparing
// with al, when I could have just compared al, [EDI]. doh !
// Fastpos is now about 28% faster
//==============================================================================
//Date : 12 Apr, 2000
//Found : hans gulo <hans@sangar.dhs.org>
//Fixed : Pete M
//Change: I was constantly converting to/from character indexes/pointers.
// Considering we need pointers for MOVing data this was pointless +
// Hans managed to write a quicker FastReplace in pure Object Pascal. (Nice job Hans)
// Now I use pointers instead, this results in a much faster replace.
// As I have always said, never assuming you have the fastest code :-)
//==============================================================================
//Date : 02 May, 2000
//Found : hans gulo (again)
//Fixed : Pete M
//Change: In some (odd) circumstances FastMemPos(NC) would return a true result
// for a substring that did not exist.
//==============================================================================
//Date : 19 May, 2000
//Found : Dave Datta
//Fixed : Pete M
//Change: If the SOURCE was very small, and the REPLACE was very large, this
// causes either an integer overflow or OutOfMemory. In this case we
// estimate the result size a lot lower and resize the result whenever
// required (still not as often as StringReplace). See the const
// cDeltaSize !!
// You *may* still run out of memory, but that is a memory issue.
//==============================================================================
//Date : 16 September, 2000
//Found : Lorenz Graf
//Fixed : Pete M
//Change: FastReplace had some EXIT statements before RESULT had been set.
// I thought this would result in a Result of "", but it resulted in an
// undetermined result (usually the same as the last valid result)
// Set Result := '' in the first line of the code.
//==============================================================================
//Date : 21 September, 2000
//Found : Chris Baldwin (TCrispy)
//Fixed : Pete M
//Change: NoCase routines were not working correctly with non-alphabetical
// characters. eg, ) and #9 were thought to be the same
// (Due to the UpCase routine simple ANDing the value eith $df)
// Had to add lookup tables, which probably slows it down a little.
//==============================================================================
//Date : 21 September, 2000
//Found : Pete M
//Fixed : Pete M
//Change: Forward searching routines could return errors if 0 was passed as the
// StartPos.
// This is actually an invalid value (1 is the first character)
// So I inlcluded assert() statments.
// Was *NOT* implemented in FastMEMPos as this is MEMORY and not a string
//==============================================================================
//Date : 25 September, 2000
//Found : Lorenz Graf
//Fixed : Pete M
//Change: Incorrect value returned from FastMemPos if the SourceString and
// FindString were the same values.
// Also incorrect value returned from FastReplace if SourceString was ''
//==============================================================================
//Date : 01 October, 2000
//Found : DJ (#delphi undernet)
//Fixed : Pete M
//Change: Uppercase table was incorrect for international alphabets.
//==============================================================================
//Date : 23 November, 2000
//Found : DJ (#delphi undernet)
//Fixed : Pete M
//Change: CharUpperBuff(@GUpcaseTable[1], 256); should have been
// CharUpperBuff(@GUpcaseTable[0], 256);
//==============================================================================
//Date : 23 June, 2001
//Found : Lawrence Cheung <yllcheung@yahoo.com>
//Fixed : Pete M
//Change: FastPosBack ('bacdefga', 'a', 8, 1, 7);
// The above example should return 2 but was returning 8
//==============================================================================
//Date : 24 Aug, 2001
//Found : New development
//Fixed : Pete M
//Change: Removed FastMemPos, FastMemPosNoCase and replaced with BMPos and
// BMPosNoCase.
// These routines use my interpretation of a Boyer-Moore search routine.
// If you call these routines directly you must first call
// MakeBMTable or MakeBMTableNoCase, and you MUST call the correct routine !
// Maybe I will create Boyer-Moore routines for backwards searching too.
//==============================================================================
//Date : 06 Sept, 2001
//Found : Tim Frost <tim@roundhill.co.uk>
//Fixed : Pete M
//Change: Tim pointed out that using a global variable meant that the routines
// were no longer thread safe. I have had to change all POS type routines
// so that they accept a JumpTable as an additional variable. Sorry if
// anyone calls these routines directly.
//==============================================================================
//Date : 11 Sept, 2001
//Found : Misc
//Fixed : Pete M
//Change: MakeBMTable...... was not functioning correctly
//==============================================================================
//Date : 10 January, 2002
//Found : Pete M
//Fixed : Pete M
//Change: A hideously small possibility that copying the remainder of the source
// string to the end of Result when reaching the end of FastReplace
// would run over the end of our buffer has been fixed. (No cases reported)
//==============================================================================
//Date : 19 July, 2002
//Found : Robert Croshere <croshere@cns.nl>
//Fixed : Pete M
//Change: A bug when replacing a string with '' has been fixed.
//==============================================================================
//Date : 14 August, 2002
//Found : Mark Derricutt <mark@talios.com>
//Fixed : Mark Derricutt <mark@talios.com>
//Change: Made compatible with Linux
//==============================================================================
//Date : 23 October, 2002
//Fixed : Marc Bir <marc@delphihome.com>
//Change: Made compatible with Linux
//==============================================================================
//Date : 02 November, 2002
//Fixed : Pete M
//Change: Added FastAnsiReplace. Parameter compatible with StringReplace but
// works with Multi-byte character sets (Japan, Korea, etc).
//==============================================================================
//Date : 26 January, 2003
//Fixed : Pete M
//Change: Added FastTagReplace. Lets you specify a TagStart and TagEnd, each
// time text is encountered with these tags surrounding them, eg
// <!UserName!> a callback procedure will be executed allowing you to
// replace the tag with some specific text.
//==============================================================================
//Date : 12 Febuary, 2003
//Fixed : Pete M
//Change: Added UserData: Integer to TFastTagReplaceProc so that a callback can
// pass user data (such as an object instance)
//==============================================================================
//Date : 15 Febuary, 2003
//Fixed : Pete M
//Change: It was possible for the procedure AddBuffer embedded within
// FastTagReplace to not allocate a large enough buffer.
unit FastStrings;
interface
uses
{$IFNDEF LINUX}
Windows,
{$ENDIF}
SysUtils;
//This TYPE declaration will become apparent later
type
TBMJumpTable = array[0..255] of Integer;
TFastPosProc = function (const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer;
TFastPosIndexProc = function (const aSourceString, aFindString: string; const aSourceLen, aFindLen, StartPos: Integer; var JumpTable: TBMJumpTable): Integer;
TFastTagReplaceProc = procedure (var Tag: string; const UserData: Integer);
//Boyer-Moore routines
procedure MakeBMTable(Buffer: PChar; BufferLen: Integer; var JumpTable: TBMJumpTable);
procedure MakeBMTableNoCase(Buffer: PChar; BufferLen: Integer; var JumpTable: TBMJumpTable);
function BMPos(const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer;
function BMPosNoCase(const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer;
function FastAnsiReplace(const S, OldPattern, NewPattern: string; Flags: TReplaceFlags): string;
procedure FastCharMove(const Source; var Dest; Count : Integer);
function FastCharPos(const aSource : string; const C: Char; StartPos : Integer): Integer;
function FastCharPosNoCase(const aSource : string; C: Char; StartPos : Integer): Integer;
function FastPos(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer;
function FastPosNoCase(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer;
function FastPosBack(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer;
function FastPosBackNoCase(const aSourceString, aFindString : string; const aSourceLen, aFindLen, StartPos : Integer) : Integer;
function FastReplace(const aSourceString : string; const aFindString, aReplaceString : string;
CaseSensitive : Boolean = False) : string;
function FastTagReplace(const SourceString, TagStart, TagEnd: string;
FastTagReplaceProc: TFastTagReplaceProc; const UserData: Integer): string;
function SmartPos(const SearchStr,SourceStr : string;
const CaseSensitive : Boolean = TRUE;
const StartPos : Integer = 1;
const ForwardSearch : Boolean = TRUE) : Integer;
implementation
const
cDeltaSize = 1.5;
var
GUpcaseTable : array[0..255] of char;
GUpcaseLUT: Pointer;
//MakeBMJumpTable takes a FindString and makes a JumpTable
procedure MakeBMTable(Buffer: PChar; BufferLen: Integer; var JumpTable: TBMJumpTable);
begin
if BufferLen = 0 then raise Exception.Create('BufferLen is 0');
asm
push EDI
push ESI
mov EDI, JumpTable
mov EAX, BufferLen
mov ECX, $100
REPNE STOSD
mov ECX, BufferLen
mov EDI, JumpTable
mov ESI, Buffer
dec ECX
xor EAX, EAX
@@loop:
mov AL, [ESI]
lea ESI, ESI + 1
mov [EDI + EAX * 4], ECX
dec ECX
jg @@loop
pop ESI
pop EDI
end;
end;
procedure MakeBMTableNoCase(Buffer: PChar; BufferLen: Integer; var JumpTable: TBMJumpTable);
begin
if BufferLen = 0 then raise Exception.Create('BufferLen is 0');
asm
push EDI
push ESI
mov EDI, JumpTable
mov EAX, BufferLen
mov ECX, $100
REPNE STOSD
mov EDX, GUpcaseLUT
mov ECX, BufferLen
mov EDI, JumpTable
mov ESI, Buffer
dec ECX
xor EAX, EAX
@@loop:
mov AL, [ESI]
lea ESI, ESI + 1
mov AL, [EDX + EAX]
mov [EDI + EAX * 4], ECX
dec ECX
jg @@loop
pop ESI
pop EDI
end;
end;
function BMPos(const aSource, aFind: Pointer; const aSourceLen, aFindLen: Integer; var JumpTable: TBMJumpTable): Pointer;
var
LastPos: Pointer;
begin
LastPos := Pointer(Integer(aSource) + aSourceLen - 1);
asm
push ESI
push EDI
push EBX
mov EAX, aFindLen
mov ESI, aSource
lea ESI, ESI + EAX - 1
std
mov EBX, JumpTable
@@comparetext:
cmp ESI, LastPos
jg @@NotFound
mov EAX, aFindLen
mov EDI, aFind
mov ECX, EAX
push ESI //Remember where we are
lea EDI, EDI + EAX - 1
xor EAX, EAX
@@CompareNext:
mov al, [ESI]
cmp al, [EDI]
jne @@LookAhead
lea ESI, ESI - 1
lea EDI, EDI - 1
dec ECX
jz @@Found
jmp @@CompareNext
@@LookAhead:
//Look up the char in our Jump Table
pop ESI
mov al, [ESI]
mov EAX, [EBX + EAX * 4]
lea ESI, ESI + EAX
jmp @@CompareText
@@NotFound:
mov Result, 0
jmp @@TheEnd
@@Found:
pop EDI //We are just popping, we don't need the value
inc ESI
mov Result, ESI
@@TheEnd:
cld
pop EBX
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -