📄 stsort.pas
字号:
(* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is TurboPower SysTools
*
* The Initial Developer of the Original Code is
* TurboPower Software
*
* Portions created by the Initial Developer are Copyright (C) 1996-2002
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* ***** END LICENSE BLOCK ***** *)
{*********************************************************}
{* SysTools: StSort.pas 4.03 *}
{*********************************************************}
{* SysTools: General purpose sorting class using *}
{* merge sort algorithm *}
{*********************************************************}
{$I StDefine.inc}
{Notes:
The sequence to sort data is this:
Sorter := TStSorter.Create(MaxHeap, RecLen);
Sorter.Compare := ACompareFunction;
repeat
... obtain ADataRecord from somewhere ...
Sorter.Put(ADataRecord);
until NoMoreData;
while Sorter.Get(ADataRecord) do
... do something with ADataRecord ...
Sorter.Free;
While Put is called, the sorter buffers as many records as it can fit in
MaxHeap. When that space is filled, it sorts the buffer and stores that
buffer to a temporary merge file. When Get is called, the sorter sorts the
last remaining buffer and starts either returning the records from the
buffer (if all records fit into memory) or merging the files and returning
the records from there.
The Compare function can be used as a place to display status and to abort
the sort. It is not possible to accurately predict the total number of
times Compare will be called, but it is called very frequently throughout
the sort. To abort a sort from the Compare function, just raise an
exception there.
The Reset method can be called to sort another set of data of the same
record length. Once Get has been called, Put cannot be called again unless
Reset is called first.
There is no default Compare function. One must be assigned after creating
a TStSorter and before calling Put. Otherwise an exception is raised the
first time a Compare function is needed.
If Create cannot allocate MaxHeap bytes for a work buffer, it
repeatedly divides MaxHeap by two until it can successfully allocate that
much space. After finding a block it can allocate, it does not attempt to
allocate larger blocks that might still fit.
Unlike MSORTP, STSORT always swaps full records. It does not use pointer
swapping for large records. If this is desirable, the application should
pass pointers to previously allocated records into the TStSorter class.
The OptimumHeapToUse, MinimumHeapToUse, and MergeInfo functions can be used
to optimize the buffer size before starting a sort.
By default, temporary merge files are saved in the current directory with
names of the form SORnnnnn.TMP, where nnnnn is a sequential file number.
You can supply a different merge name function via the MergeName property
to put the files in a different location or use a different form for the
names.
The sorter is thread-aware and uses critical sections to protect the Put,
Get, and Reset methods. Be sure that one thread does not call Put after
another thread has already called Get.
}
unit StSort;
interface
uses
Windows,
SysUtils, STConst, STBase;
const
{.Z+}
MinRecsPerRun = 4; {Minimum number of records in run buffer}
MergeOrder = 5; {Input files used at a time during merge, >=2, <=10}
MedianThreshold = 16; {Threshold for using median-of-three quicksort}
{.Z-}
type
TMergeNameFunc = function (MergeNum : Integer) : string;
TMergeInfo = record {Record returned by MergeInfo}
SortStatus : Integer; {Predicted status of sort, assuming disk ok}
MergeFiles : Integer; {Total number of merge files created}
MergeHandles : Integer; {Maximum file handles used}
MergePhases : Integer; {Number of merge phases}
MaxDiskSpace : LongInt; {Maximum peak disk space used}
HeapUsed : LongInt; {Heap space actually used}
end;
{.Z+}
TMergeIntArray = array[1..MergeOrder] of Integer;
TMergeLongArray = array[1..MergeOrder] of LongInt;
TMergePtrArray = array[1..MergeOrder] of Pointer;
{.Z-}
TStSorter = class(TObject)
{.Z+}
protected
{property instance variables}
FCount : LongInt; {Number of records put to sort}
FRecLen : Cardinal; {Size of each record}
FCompare : TUntypedCompareFunc; {Compare function}
FMergeName : TMergeNameFunc; {Merge file naming function}
{private instance variables}
sorRunCapacity : LongInt; {Capacity (in records) of run buffer}
sorRunCount : LongInt; {Current number of records in run buffer}
sorGetIndex : LongInt; {Last run element passed back to user}
sorPivotPtr : Pointer; {Pointer to pivot record}
sorSwapPtr : Pointer; {Pointer to swap record}
sorState : Integer; {0 = empty, 1 = adding, 2 = getting}
sorMergeFileCount : Integer; {Number of merge files created}
sorMergeFileMerged : Integer; {Index of last merge file merged}
sorMergeOpenCount : Integer; {Count of open merge files}
sorMergeBufSize : LongInt; {Usable bytes in merge buffer}
sorMergeFileNumber : TMergeIntArray; {File number of each open merge file}
sorMergeFiles : TMergeIntArray; {File handles for merge files}
sorMergeBytesLoaded: TMergeLongArray;{Count of bytes in each merge buffer}
sorMergeBytesUsed : TMergeLongArray; {Bytes used in each merge buffer}
sorMergeBases : TMergePtrArray; {Base index for each merge buffer}
sorMergePtrs : TMergePtrArray; {Current head elements in each merge buffer}
sorOutFile : Integer; {Output file handle}
sorOutPtr : Pointer; {Pointer for output buffer}
sorOutBytesUsed : LongInt; {Number of bytes in output buffer}
{$IFDEF ThreadSafe}
sorThreadSafe : TRTLCriticalSection;{Windows critical section record}
{$ENDIF}
sorBuffer : Pointer; {Pointer to global buffer}
{protected undocumented methods}
procedure sorAllocBuffer(MaxHeap : LongInt);
procedure sorCreateNewMergeFile(var Handle : Integer);
procedure sorDeleteMergeFiles;
function sorElementPtr(Index : LongInt) : Pointer;
procedure sorFlushOutBuffer;
procedure sorFreeBuffer;
procedure sorGetMergeElementPtr(M : Integer);
function sorGetNextElementIndex : Integer;
procedure sorMergeFileGroup;
procedure sorMoveElement(Src, Dest : Pointer);
procedure sorOpenMergeFiles;
procedure sorPrimaryMerge;
procedure sorRunSort(L, R : LongInt);
procedure sorStoreElement(Src : Pointer);
procedure sorStoreNewMergeFile;
procedure sorSwapElements(L, R : LongInt);
procedure sorSetCompare(Comp : TUntypedCompareFunc);
{protected documented methods}
procedure EnterCS;
{-Enter critical section for this instance}
procedure LeaveCS;
{-Leave critical section}
{.Z-}
public
constructor Create(MaxHeap : LongInt; RecLen : Cardinal); virtual;
{-Initialize a sorter}
destructor Destroy; override;
{-Destroy a sorter}
procedure Put(const X);
{-Add an element to the sort system}
function Get(var X) : Boolean;
{-Return next sorted element from the sort system}
procedure Reset;
{-Reset sorter before starting another sort}
property Count : LongInt
{-Return the number of elements in the sorter}
read FCount;
property Compare : TUntypedCompareFunc
{-Set or read the element comparison function}
read FCompare
write sorSetCompare;
property MergeName : TMergeNameFunc
{-Set or read the merge filename function}
read FMergeName
write FMergeName;
property RecLen : Cardinal
{-Return the size of each record}
read FRecLen;
end;
function OptimumHeapToUse(RecLen : Cardinal; NumRecs : LongInt) : LongInt;
{-Returns the optimum amount of heap space to sort NumRecs records
of RecLen bytes each. Less heap space causes merging; more heap
space is partially unused.}
function MinimumHeapToUse(RecLen : Cardinal) : LongInt;
{-Returns the absolute minimum heap that allows MergeSort to succeed}
function MergeInfo(MaxHeap : LongInt; RecLen : Cardinal;
NumRecs : LongInt) : TMergeInfo;
{-Predicts status and resource usage of a merge sort}
function DefaultMergeName(MergeNum : Integer) : string;
{-Default function used for returning merge file names}
procedure ArraySort(var A; RecLen, NumRecs : Cardinal;
Compare : TUntypedCompareFunc);
{-Sort a normal Delphi array (A) in place}
{======================================================================}
implementation
const
ecOutOfMemory = 8;
procedure RaiseError(Code : longint);
var
E : ESTSortError;
begin
if Code = ecOutOfMemory then
OutOfMemoryError
else begin
E := ESTSortError.CreateResTP(Code, 0);
E.ErrorCode := Code;
raise E;
end;
end;
function DefaultMergeName(MergeNum : Integer) : string;
begin
Result := 'SOR'+IntToStr(MergeNum)+'.TMP';
end;
function MergeInfo(MaxHeap : LongInt; RecLen : Cardinal;
NumRecs : LongInt) : TMergeInfo;
type
MergeFileSizeArray = array[1..(StMaxBlockSize div SizeOf(LongInt))] of LongInt;
var
MFileMerged, MOpenCount, MFileCount : Integer;
SizeBufSize, DiskSpace, OutputSpace, PeakDiskSpace : LongInt;
AllocRecs, RunCapacity, RecordsLeft, RecordsInFile : LongInt;
MFileSizeP : ^MergeFileSizeArray;
begin
{Set defaults for the result}
FillChar(Result, SizeOf(TMergeInfo), 0);
{Validate input parameters}
if (RecLen = 0) or (MaxHeap <= 0) or (NumRecs <= 0) then begin
Result.SortStatus := stscBadSize;
Exit;
end;
AllocRecs := MaxHeap div LongInt(RecLen);
if AllocRecs < MergeOrder+1 then begin
Result.SortStatus := stscBadSize;
Exit;
end;
RunCapacity := AllocRecs-2;
if RunCapacity < MinRecsPerRun then begin
Result.SortStatus := stscBadSize;
Exit;
end;
{Compute amount of memory used}
Result.HeapUsed := AllocRecs*LongInt(RecLen);
if RunCapacity >= NumRecs then
{All the records fit into memory}
Exit;
{Compute initial number of merge files and disk space}
MFileCount := NumRecs div (AllocRecs-2);
if NumRecs mod (AllocRecs-2) <> 0 then
inc(MFileCount);
{if MFileCount > MaxInt then begin }
{ Result.SortStatus := stscTooManyFiles;}
{ Exit; }
{end; }
DiskSpace := NumRecs*LongInt(RecLen);
{At least one merge phase required}
Result.MergePhases := 1;
if MFileCount <= MergeOrder then begin
{Only one merge phase, direct to user}
Result.MergeFiles := MFileCount;
Result.MergeHandles := MFileCount;
Result.MaxDiskSpace := DiskSpace;
Exit;
end;
{Compute total number of merge files and merge phases}
MFileMerged := 0;
while MFileCount-MFileMerged > MergeOrder do begin
inc(Result.MergePhases);
MOpenCount := 0;
while (MOpenCount < MergeOrder) and (MFileMerged < MFileCount) do begin
inc(MOpenCount);
inc(MFileMerged);
end;
inc(MFileCount);
end;
{Store the information we already know}
Result.MergeFiles := MFileCount;
Result.MergeHandles := MergeOrder+1; {MergeOrder input files, 1 output file}
{Determine whether the disk space analysis can proceed}
Result.MaxDiskSpace := -1;
if MFileCount > (StMaxBlockSize div SizeOf(LongInt)) then
Exit;
SizeBufSize := MFileCount*SizeOf(LongInt);
try
GetMem(MFileSizeP, SizeBufSize);
except
Exit;
end;
{Compute size of initial merge files}
RecordsLeft := NumRecs;
MFileCount := 0;
while RecordsLeft > 0 do begin
inc(MFileCount);
if RecordsLeft >= RunCapacity then
RecordsInFile := RunCapacity
else
RecordsInFile := RecordsLeft;
MFileSizeP^[MFileCount] := RecordsInFile*LongInt(RecLen);
dec(RecordsLeft, RecordsInFile);
end;
{Carry sizes forward to get disk space used}
PeakDiskSpace := DiskSpace;
MFileMerged := 0;
while MFileCount-MFileMerged > MergeOrder do begin
MOpenCount := 0;
OutputSpace := 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -