📄 fastcodefillchar.pas
字号:
unit FastCodeFillChar;
(* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Fastcode
*
* The Initial Developer of the Original Code is Fastcode
*
* Portions created by the Initial Developer are Copyright (C) 2002-2004
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Charalabos Michael <chmichael@creationpower.com>
* John O'Harrow <john@elmcrest.demon.co.uk>
*
* ***** END LICENSE BLOCK ***** *)
interface
{$I FastCode.inc}
type
FastCodeFillCharFunction = procedure(var Dest; count: Integer; Value: Char);
{Functions shared between Targets}
procedure FastCodeFillCharRTL (var Dest; count: Integer; Value: Char);
procedure FastCodeFillCharP4 (var Dest; count: Integer; Value: Char);
{Functions not shared between Targets}
procedure FastcodeFillCharBlended(var Dest; count: Integer; Value: Char);
procedure FastcodeFillCharP4N (var Dest; count: Integer; Value: Char);
procedure FastcodeFillCharXP (var Dest; count: Integer; Value: Char);
procedure FastcodeFillCharPascal (var Dest; count: Integer; Value: Char);
const
Version = '0.3';
FastCodeFillCharPMD : FastCodeFillCharFunction = FastCodeFillCharRTL;
FastCodeFillCharP4P : FastCodeFillCharFunction = FastCodeFillCharP4;
FastCodeFillCharPMB : FastCodeFillCharFunction = FastCodeFillCharP4;
FastCodeFillCharAMD64: FastCodeFillCharFunction = FastCodeFillCharP4;
procedure FillCharStub;
implementation
uses
SysUtils;
//Author: Dennis Christensen
//Optimized for: Intel P4 Prescott
//Instructionset(s): IA32
//Original name: FillCharDKC_SSE2_10
procedure FastCodeFillCharRTL(var Dest; count: Integer; Value: Char);
asm {Size = 153 Bytes}
cmp edx, 32
mov ch, cl {Copy Value into both Bytes of CX}
jl @@Small
mov [eax ], cx {Fill First 8 Bytes}
mov [eax+2], cx
mov [eax+4], cx
mov [eax+6], cx
sub edx, 16
fld qword ptr [eax]
fst qword ptr [eax+edx] {Fill Last 16 Bytes}
fst qword ptr [eax+edx+8]
mov ecx, eax
and ecx, 7 {8-Byte Align Writes}
sub ecx, 8
sub eax, ecx
add edx, ecx
add eax, edx
neg edx
@@Loop:
fst qword ptr [eax+edx] {Fill 16 Bytes per Loop}
fst qword ptr [eax+edx+8]
add edx, 16
jl @@Loop
ffree st(0)
ret
nop
nop
nop
@@Small:
test edx, edx
jle @@Done
mov [eax+edx-1], cl {Fill Last Byte}
and edx, -2 {No. of Words to Fill}
neg edx
lea edx, [@@SmallFill + 60 + edx * 2]
jmp edx
nop {Align Jump Destinations}
nop
@@SmallFill:
mov [eax+28], cx
mov [eax+26], cx
mov [eax+24], cx
mov [eax+22], cx
mov [eax+20], cx
mov [eax+18], cx
mov [eax+16], cx
mov [eax+14], cx
mov [eax+12], cx
mov [eax+10], cx
mov [eax+ 8], cx
mov [eax+ 6], cx
mov [eax+ 4], cx
mov [eax+ 2], cx
mov [eax ], cx
ret {DO NOT REMOVE - This is for Alignment}
@@Done:
end;
//Author: John O'Harrow
//Optimized for: Intel P4 Northwood
//Instructionset(s): IA32
//Original name: FillCharJOH_SSE2
procedure FastCodeFillCharP4(var Dest; count: Integer; Value: Char);
asm
test edx,edx
jle @Exit2
//case Count of
cmp edx,31
jnbe @CaseElse
jmp dword ptr [edx*4+@Case1JmpTable]
@CaseCount0 :
ret
@CaseCount1 :
mov [eax],cl
ret
@CaseCount2 :
mov ch,cl
mov [eax],cx
ret
@CaseCount3 :
mov ch,cl
mov [eax],cx
mov [eax+2],cl
ret
@CaseCount4 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
ret
@CaseCount5 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cl
ret
@CaseCount6 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
ret
@CaseCount7 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cl
ret
@CaseCount8 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
ret
@CaseCount9 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cl
ret
@CaseCount10 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
ret
@CaseCount11 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cl
ret
@CaseCount12 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
ret
@CaseCount13 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cl
ret
@CaseCount14 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
ret
@CaseCount15 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cl
ret
@CaseCount16 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
ret
@CaseCount17 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cl
ret
@CaseCount18 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
ret
@CaseCount19 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cl
ret
@CaseCount20 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
ret
@CaseCount21 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cl
ret
@CaseCount22 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
ret
@CaseCount23 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cl
ret
@CaseCount24 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cx
ret
@CaseCount25 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cx
mov [eax+24],cl
ret
@CaseCount26 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cx
mov [eax+24],cx
ret
@CaseCount27 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cx
mov [eax+24],cx
mov [eax+26],cl
ret
@CaseCount28 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cx
mov [eax+24],cx
mov [eax+26],cx
ret
@CaseCount29 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cx
mov [eax+24],cx
mov [eax+26],cx
mov [eax+28],cl
ret
@CaseCount30 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cx
mov [eax+24],cx
mov [eax+26],cx
mov [eax+28],cx
ret
@CaseCount31 :
mov ch,cl
mov [eax],cx
mov [eax+2],cx
mov [eax+4],cx
mov [eax+6],cx
mov [eax+8],cx
mov [eax+10],cx
mov [eax+12],cx
mov [eax+14],cx
mov [eax+16],cx
mov [eax+18],cx
mov [eax+20],cx
mov [eax+22],cx
mov [eax+24],cx
mov [eax+26],cx
mov [eax+28],cx
mov [eax+30],cl
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -