📄 abdfinw.pas
字号:
(* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is TurboPower Abbrevia * * The Initial Developer of the Original Code is * TurboPower Software * * Portions created by the Initial Developer are Copyright (C) 1997-2002 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * ***** END LICENSE BLOCK ***** *){*********************************************************}{* ABBREVIA: AbDfInW.pas 3.05 *}{*********************************************************}{* Deflate input sliding window unit *}{*********************************************************}unit AbDfInW;{$I AbDefine.inc}interfaceuses SysUtils, Classes, AbDfBase;{Notes: TdfInputWindow implements a sliding window on data for the LZ77 dictionary encoding. The stream passed to the class is automatically read when required to keep the internal buffer fully loaded. }type TAbDfMatch = record maLen : integer; maDist : integer; maLit : AnsiChar; end;type TAbDfInputWindow = class private FAdvanceStart : boolean; FBuffer : PAnsiChar; FBufferEnd : PAnsiChar; FBytesUsed : longint; FChainLen : integer; FHashChains : PPointerList; FHashHeads : PPointerList; FHashIndex : integer; FChecksum : longint; FCurrent : PAnsiChar; FLookAheadEnd : PAnsiChar; FMaxMatchLen : integer; FMustSlide : boolean; FOnProgress : TAbProgressStep; FSlidePoint : PAnsiChar; FStart : PAnsiChar; FStartOffset : longint; FStream : TStream; FStreamSize : longint; FUseCRC32 : boolean; FUseDeflate64 : boolean; FWinMask : integer; FWinSize : integer; protected function iwGetChecksum : longint; procedure iwReadFromStream; procedure iwSetCapacity(aValue : longint); procedure iwSlide; public constructor Create(aStream : TStream; aStreamSize : longint; aWinSize : integer; aChainLength : integer; aUseDeflate64 : boolean; aUseCRC32 : boolean); destructor Destroy; override; procedure Advance(aCount : integer; aHashCount : integer); procedure AdvanceByOne; function FindLongestMatch(aAmpleLength : integer; var aMatch : TAbDfMatch; const aPrevMatch : TAbDfMatch) : boolean; function GetNextChar : AnsiChar; function GetNextKeyLength : integer; function Position : longint; procedure ReadBuffer(var aBuffer; aCount : longint; aOffset : longint); property ChainLen : integer read FChainLen write FChainLen; property Checksum : longint read iwGetChecksum; property OnProgress : TAbProgressStep read FOnProgress write FOnProgress; end;implementation{Notes: Meaning of the internal pointers: |----------+===================+==+--------------------------| | | | | | FBuffer FStart FCurrent FLookAheadEnd FBufferEnd FCurrent is the current match position. The valid data that can be matched is between FStart and FLookAheadEnd, The data between FStart and FCurrent has already been seen; the data between FCurrent and FLookAheadEnd can be used for matching. The buffer size depends on the requested window size (a multiple of 1KB, up to 32KB for deflate, up to 64KB for deflate64) and the lookahead size (up to 258 bytes for deflate and 64KB for deflate64.) The window of data continuously slides to the right, and is slid back to FBuffer whenever FStart reaches a point 16KB away, this point being given by FSlidePoint. The hash table: This is a chained hash table with some peculiarities. First the table itself, FHashHeads. It contains pointers to strings in the window buffer, not to chains. The chains are held is a separate structure, FHashChains. The hash function on the three-character keys is a Rabin-Karp function: ((((Ch1 shl 5) xor Ch2) shl 5) xor Ch3) and $3FFF designed so that a running hash value can be kept and calculated per character. The hash table is $4000 elements long (obviously, given the hash function). On insertion, the previous pointer in the hash table at the calculated index is saved and replaced by the new pointer. The old pointer is saved in the chains array. This has the same number of elements as the sliding window has characters. The pointer is placed at (Ptr and (WindowsSize-1)) overwriting the value that's already there. In this fashion the individual chains in the standard hash table are interwoven with each other in this hash table, like a skein of threads. }const c_HashCount = $4000; {the number of hash entries} c_HashMask = c_HashCount - 1; {a mask for the hash function} c_HashShift = 5; {shift value for the hash function}{===TAbDfInputWindow=================================================}constructor TAbDfInputWindow.Create(aStream : TStream; aStreamSize : longint; aWinSize : integer; aChainLength : integer; aUseDeflate64 : boolean; aUseCRC32 : boolean);begin {create the ancestor} inherited Create; {save parameters} FStreamSize := aStreamSize; FWinSize := aWinSize; FWinMask := aWinSize - 1; FStream := aStream; FChainLen := aChainLength; FUseDeflate64 := aUseDeflate64; FUseCRC32 := aUseCRC32; if aUseCRC32 then FChecksum := -1 { CRC32 starts off with all bits set } else FCheckSum := 1; { Adler32 starts off with a value of 1 } {set capacity of sliding window} iwSetCapacity(aWinSize); {create the hash table, first the hash table itself (and set all entries to nil)} FHashHeads := AllocMem(c_HashCount * sizeof(pointer)); {..now the chains (there's no need to set the entries to nil, since the chain entries get fed from the head entries before searching)} GetMem(FHashChains, aWinSize * sizeof(pointer)); {read the first chunk of data from the stream} FMustSlide := true; iwReadFromStream; {if there are at least two bytes, prime the hash index} if ((FLookAheadEnd - FBuffer) >= 2) then FHashIndex := ((longint(FBuffer[0]) shl c_HashShift) xor longint(FBuffer[1])) and c_HashMask;end;{--------}destructor TAbDfInputWindow.Destroy;begin {free the hash table} FreeMem(FHashHeads); FreeMem(FHashChains); {free the buffer} FreeMem(FBuffer); {destroy the ancestor} inherited Destroy;end;{--------}procedure TAbDfInputWindow.Advance(aCount : integer; aHashCount : integer);var i : integer; ByteCount : integer; Percent : integer; HashChains: PPointerList; HashHeads : PPointerList; HashInx : integer; CurPos : PAnsiChar;begin Assert((FLookAheadEnd - FCurrent) >= aCount, 'TAbDfInputWindow.Advance: seem to be advancing into the unknown'); Assert((aHashCount = aCount) or (aHashCount = pred(aCount)), 'TAbDfInputWindow.Advance: the parameters are plain wrong'); {use local var for speed} CurPos := FCurrent; {advance the current pointer if needed} if (aCount > aHashCount) then inc(CurPos); {make sure we update the hash table; remember that the string[3] at the current position has already been added to the hash table (for notes on updating the hash table, see FindLongestMatch} {use local vars for speed} HashChains := FHashChains; HashHeads := FHashHeads; HashInx := FHashIndex; {update the hash table} for i := 0 to pred(aHashCount) do begin HashInx := ((HashInx shl c_HashShift) xor longint(CurPos[2])) and c_HashMask; HashChains^[longint(CurPos) and FWinMask] := HashHeads^[HashInx]; HashHeads^[HashInx] := CurPos; inc(CurPos); end; {replace old values} FHashChains := HashChains; FHashHeads := HashHeads; FHashIndex := HashInx; FCurrent := CurPos; {if we've seen at least FWinSize bytes...} if FAdvanceStart then begin {advance the start of the sliding window} inc(FStart, aCount); inc(FStartOffset, aCount); {check to see if we have advanced into the slide zone} if FMustSlide and (FStart >= FSlidePoint) then iwSlide; end {otherwise check to see if we've seen at least FWinSize bytes} else if ((CurPos - FStart) >= FWinSize) then begin FAdvanceStart := true; {note: we can't advance automatically aCount bytes here, we need to calculate the actual count} ByteCount := (CurPos - FWinSize) - FStart; inc(FStart, ByteCount); inc(FStartOffset, ByteCount); end; {show progress} if Assigned(FOnProgress) then begin inc(FBytesUsed, aCount); if ((FBytesUsed and $FFF) = 0) then begin Percent := Round((100.0 * FBytesUsed) / FStreamSize); FOnProgress(Percent); end; end; {check to see if we have advanced into the slide zone} if (FStart >= FSlidePoint) then iwSlide;end;{--------}procedure TAbDfInputWindow.AdvanceByOne;var Percent : integer;begin {advance the current pointer} inc(FCurrent); {if we've seen at least FWinSize bytes...} if FAdvanceStart then begin {advance the start of the sliding window} inc(FStart, 1); inc(FStartOffset, 1); {check to see if we have advanced into the slide zone} if FMustSlide and (FStart >= FSlidePoint) then iwSlide; end {otherwise check to see if we've seen FWinSize bytes} else if ((FCurrent - FStart) = FWinSize) then FAdvanceStart := true; {show progress} if Assigned(FOnProgress) then begin inc(FBytesUsed, 1); if ((FBytesUsed and $FFF) = 0) then begin Percent := Round((100.0 * FBytesUsed) / FStreamSize); FOnProgress(Percent); end; end;end;{--------}function TAbDfInputWindow.FindLongestMatch(aAmpleLength : integer; var aMatch : TAbDfMatch; const aPrevMatch : TAbDfMatch) : boolean;{Note: this routine implements a greedy algorithm and is by far the time sink for compression. There are two versions, one written in Pascal for understanding, one in assembler for speed. Activate one and only one of the following compiler defines.}{$DEFINE UseGreedyAsm}{.$DEFINE UseGreedyPascal}{Check to see that all is correct}{$IFDEF UseGreedyAsm} {$IFDEF UseGreedyPascal} !! Compile Error: only one of the greedy compiler defines can be used {$ENDIF}{$ELSE} {$IFNDEF UseGreedyPascal} !! Compile Error: one of the greedy compiler defines must be used {$ENDIF}{$ENDIF}type PLongint = ^longint; PWord = ^word;var MaxLen : longint; MaxDist : longint; MaxMatch : integer; ChainLen : integer; PrevStrPos : PAnsiChar; CurPos : PAnsiChar; {$IFDEF UseGreedyAsm} CurWord : word; MaxWord : word; {$ENDIF} {$IFDEF UseGreedyPascal} Len : longint; MatchStr : PAnsiChar; CurrentCh : PAnsiChar; CurCh : char;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -